From 7ee0ee78437d3afb77cdd559ca395924061e1f5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 10 Oct 2025 01:50:33 +0200 Subject: [PATCH] chore!: remove model mgmt from CLI for Hugging Face CLI (#3700) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change removes the `llama model` and `llama download` subcommands from the CLI, replacing them with recommendations to use the Hugging Face CLI instead. Rationale for this change: - The model management functionality was largely duplicating what Hugging Face CLI already provides, leading to unnecessary maintenance overhead (except the download source from Meta?) - Maintaining our own implementation required fixing bugs and keeping up with changes in model repositories and download mechanisms - The Hugging Face CLI is more mature, widely adopted, and better maintained - This allows us to focus on the core Llama Stack functionality rather than reimplementing model management tools Changes made: - Removed all model-related CLI commands and their implementations - Updated documentation to recommend using `huggingface-cli` for model downloads - Removed Meta-specific download logic and statements - Simplified the CLI to focus solely on stack management operations Users should now use: - `huggingface-cli download` for downloading models - `huggingface-cli scan-cache` for listing downloaded models This is a breaking change as it removes previously available CLI commands. Signed-off-by: Sébastien Han --- .github/workflows/python-build-test.yml | 2 - README.md | 2 +- .../self_hosted_distro/meta-reference-gpu.md | 26 +- .../llama_cli_reference/download_models.md | 145 +---- .../references/llama_cli_reference/index.md | 238 +-------- docs/getting_started_llama4.ipynb | 4 +- llama_stack/cli/download.py | 495 ------------------ llama_stack/cli/llama.py | 6 - llama_stack/cli/model/__init__.py | 7 - llama_stack/cli/model/describe.py | 70 --- llama_stack/cli/model/download.py | 24 - llama_stack/cli/model/list.py | 119 ----- llama_stack/cli/model/model.py | 43 -- llama_stack/cli/model/prompt_format.py | 133 ----- llama_stack/cli/model/remove.py | 68 --- llama_stack/cli/model/safety_models.py | 64 --- llama_stack/cli/model/verify_download.py | 24 - llama_stack/cli/verify_download.py | 141 ----- .../meta-reference-gpu/doc_template.md | 26 +- pyproject.toml | 34 +- uv.lock | 4 +- 21 files changed, 63 insertions(+), 1612 deletions(-) delete mode 100644 llama_stack/cli/download.py delete mode 100644 llama_stack/cli/model/__init__.py delete mode 100644 llama_stack/cli/model/describe.py delete mode 100644 llama_stack/cli/model/download.py delete mode 100644 llama_stack/cli/model/list.py delete mode 100644 llama_stack/cli/model/model.py delete mode 100644 llama_stack/cli/model/prompt_format.py delete mode 100644 llama_stack/cli/model/remove.py delete mode 100644 llama_stack/cli/model/safety_models.py delete mode 100644 llama_stack/cli/model/verify_download.py delete mode 100644 llama_stack/cli/verify_download.py diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml index fca7c4b4c..c6c443811 100644 --- a/.github/workflows/python-build-test.yml +++ b/.github/workflows/python-build-test.yml @@ -43,7 +43,5 @@ jobs: uv pip list uv pip show llama-stack command -v llama - llama model prompt-format -m Llama3.2-90B-Vision-Instruct - llama model list llama stack list-apis llama stack list-providers inference diff --git a/README.md b/README.md index 9cb9e32fc..75e9989d7 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ pip install -U llama_stack MODEL="Llama-4-Scout-17B-16E-Instruct" # get meta url from llama.com -llama model download --source meta --model-id $MODEL --meta-url +huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL # start a llama stack server INFERENCE_MODEL=meta-llama/$MODEL llama stack build --run --template meta-reference-gpu diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index 1c0ef5f6e..403a31667 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -41,31 +41,7 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. - -``` -$ llama model list --downloaded -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ -┃ Model ┃ Size ┃ Modified Time ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ -│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ -└─────────────────────────────────────────┴──────────┴─────────────────────┘ +Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI. ``` ## Running the Distribution diff --git a/docs/docs/references/llama_cli_reference/download_models.md b/docs/docs/references/llama_cli_reference/download_models.md index a9af65349..542740202 100644 --- a/docs/docs/references/llama_cli_reference/download_models.md +++ b/docs/docs/references/llama_cli_reference/download_models.md @@ -25,141 +25,42 @@ You have two ways to install Llama Stack: cd llama-stack pip install -e . -## Downloading models via CLI +## Downloading models via Hugging Face CLI -You first need to have models downloaded locally. +You first need to have models downloaded locally. We recommend using the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/guides/cli) to download models. -To download any model you need the **Model Descriptor**. -This can be obtained by running the command -``` -llama model list -``` +### Install Hugging Face CLI -You should see a table like this: - -``` -+----------------------------------+------------------------------------------+----------------+ -| Model Descriptor(ID) | Hugging Face Repo | Context Length | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-8B | meta-llama/Llama-3.1-8B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-70B | meta-llama/Llama-3.1-70B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B:bf16-mp8 | meta-llama/Llama-3.1-405B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B | meta-llama/Llama-3.1-405B-FP8 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B:bf16-mp16 | meta-llama/Llama-3.1-405B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-8B-Instruct | meta-llama/Llama-3.1-8B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-70B-Instruct | meta-llama/Llama-3.1-70B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B-Instruct:bf16-mp8 | meta-llama/Llama-3.1-405B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B-Instruct | meta-llama/Llama-3.1-405B-Instruct-FP8 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-1B | meta-llama/Llama-3.2-1B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-3B | meta-llama/Llama-3.2-3B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-11B-Vision | meta-llama/Llama-3.2-11B-Vision | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-90B-Vision | meta-llama/Llama-3.2-90B-Vision | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-1B-Instruct | meta-llama/Llama-3.2-1B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-3B-Instruct | meta-llama/Llama-3.2-3B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-11B-Vision-Instruct | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-90B-Vision-Instruct | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-11B-Vision | meta-llama/Llama-Guard-3-11B-Vision | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-1B:int4-mp1 | meta-llama/Llama-Guard-3-1B-INT4 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-1B | meta-llama/Llama-Guard-3-1B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-2-8B | meta-llama/Llama-Guard-2-8B | 4K | -+----------------------------------+------------------------------------------+----------------+ -``` - -To download models, you can use the llama download command. - -#### Downloading from [Meta](https://llama.meta.com/llama-downloads/) - -Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/). Note: You need to quote the META_URL - -Download the required checkpoints using the following commands: +First, install the Hugging Face CLI: ```bash -# download the 8B model, this can be run on a single GPU -llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url 'META_URL' - -# you can also get the 70B model, this will require 8 GPUs however -llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url 'META_URL' - -# llama-agents have safety enabled by default. For this, you will need -# safety models -- Llama-Guard and Prompt-Guard -llama download --source meta --model-id Prompt-Guard-86M --meta-url 'META_URL' -llama download --source meta --model-id Llama-Guard-3-1B --meta-url 'META_URL' +pip install huggingface_hub[cli] ``` -#### Downloading from [Hugging Face](https://huggingface.co/meta-llama) +### Download models from Hugging Face -Essentially, the same commands above work, just replace `--source meta` with `--source huggingface`. +You can download models using the `huggingface-cli download` command. Here are some examples: ```bash -llama download --source huggingface --model-id Llama3.1-8B-Instruct --hf-token +# Download Llama 3.2 3B Instruct model +huggingface-cli download meta-llama/Llama-3.2-3B-Instruct --local-dir ~/.llama/Llama-3.2-3B-Instruct -llama download --source huggingface --model-id Llama3.1-70B-Instruct --hf-token +# Download Llama 3.2 1B Instruct model +huggingface-cli download meta-llama/Llama-3.2-1B-Instruct --local-dir ~/.llama/Llama-3.2-1B-Instruct -llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original* -llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original* -``` - -**Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). - -```{tip} -Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. +# Download Llama Guard 3 1B model +huggingface-cli download meta-llama/Llama-Guard-3-1B --local-dir ~/.llama/Llama-Guard-3-1B + +# Download Prompt Guard model +huggingface-cli download meta-llama/Prompt-Guard-86M --local-dir ~/.llama/Prompt-Guard-86M ``` +**Important:** You need to authenticate with Hugging Face to download models. You can do this by: +1. Getting your token from [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) +2. Running `huggingface-cli login` and entering your token ## List the downloaded models -To list the downloaded models with the following command: -``` -llama model list --downloaded -``` - -You should see a table like this: -``` -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ -┃ Model ┃ Size ┃ Modified Time ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ -│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ -└─────────────────────────────────────────┴──────────┴─────────────────────┘ +To list the downloaded models, you can use the Hugging Face CLI: +```bash +# List all downloaded models in your local cache +huggingface-cli scan-cache ``` diff --git a/docs/docs/references/llama_cli_reference/index.md b/docs/docs/references/llama_cli_reference/index.md index 9b71a6795..0bebc601d 100644 --- a/docs/docs/references/llama_cli_reference/index.md +++ b/docs/docs/references/llama_cli_reference/index.md @@ -27,9 +27,9 @@ You have two ways to install Llama Stack: ## `llama` subcommands -1. `download`: Supports downloading models from Meta or Hugging Face. [Downloading models](#downloading-models) -2. `model`: Lists available models and their properties. [Understanding models](#understand-the-models) -3. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../distributions/building_distro) documentation. +1. `stack`: Allows you to build a stack using the `llama stack` distribution and run a Llama Stack server. You can read more about how to build a Llama Stack distribution in the [Build your own Distribution](../distributions/building_distro) documentation. + +For downloading models, we recommend using the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/guides/cli). See [Downloading models](#downloading-models) for more information. ### Sample Usage @@ -38,239 +38,41 @@ llama --help ``` ``` -usage: llama [-h] {download,model,stack} ... +usage: llama [-h] {stack} ... Welcome to the Llama CLI options: - -h, --help show this help message and exit + -h, --help show this help message and exit subcommands: - {download,model,stack} + {stack} + + stack Operations for the Llama Stack / Distributions ``` ## Downloading models -You first need to have models downloaded locally. +You first need to have models downloaded locally. We recommend using the [Hugging Face CLI](https://huggingface.co/docs/huggingface_hub/guides/cli) to download models. -To download any model you need the **Model Descriptor**. -This can be obtained by running the command -``` -llama model list -``` - -You should see a table like this: - -``` -+----------------------------------+------------------------------------------+----------------+ -| Model Descriptor(ID) | Hugging Face Repo | Context Length | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-8B | meta-llama/Llama-3.1-8B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-70B | meta-llama/Llama-3.1-70B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B:bf16-mp8 | meta-llama/Llama-3.1-405B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B | meta-llama/Llama-3.1-405B-FP8 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B:bf16-mp16 | meta-llama/Llama-3.1-405B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-8B-Instruct | meta-llama/Llama-3.1-8B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-70B-Instruct | meta-llama/Llama-3.1-70B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B-Instruct:bf16-mp8 | meta-llama/Llama-3.1-405B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B-Instruct | meta-llama/Llama-3.1-405B-Instruct-FP8 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-1B | meta-llama/Llama-3.2-1B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-3B | meta-llama/Llama-3.2-3B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-11B-Vision | meta-llama/Llama-3.2-11B-Vision | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-90B-Vision | meta-llama/Llama-3.2-90B-Vision | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-1B-Instruct | meta-llama/Llama-3.2-1B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-3B-Instruct | meta-llama/Llama-3.2-3B-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-11B-Vision-Instruct | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama3.2-90B-Vision-Instruct | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-11B-Vision | meta-llama/Llama-Guard-3-11B-Vision | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-1B:int4-mp1 | meta-llama/Llama-Guard-3-1B-INT4 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-1B | meta-llama/Llama-Guard-3-1B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-8B | meta-llama/Llama-Guard-3-8B | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-3-8B:int8-mp1 | meta-llama/Llama-Guard-3-8B-INT8 | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Prompt-Guard-86M | meta-llama/Prompt-Guard-86M | 128K | -+----------------------------------+------------------------------------------+----------------+ -| Llama-Guard-2-8B | meta-llama/Llama-Guard-2-8B | 4K | -+----------------------------------+------------------------------------------+----------------+ -``` - -To download models, you can use the `llama download` command. - -### Downloading from [Meta](https://llama.meta.com/llama-downloads/) - -Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/) - -Download the required checkpoints using the following commands: +First, install the Hugging Face CLI: ```bash -# download the 8B model, this can be run on a single GPU -llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url META_URL - -# you can also get the 70B model, this will require 8 GPUs however -llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url META_URL - -# llama-agents have safety enabled by default. For this, you will need -# safety models -- Llama-Guard and Prompt-Guard -llama download --source meta --model-id Prompt-Guard-86M --meta-url META_URL -llama download --source meta --model-id Llama-Guard-3-1B --meta-url META_URL +pip install huggingface_hub[cli] ``` -### Downloading from [Hugging Face](https://huggingface.co/meta-llama) - -Essentially, the same commands above work, just replace `--source meta` with `--source huggingface`. - +Then authenticate and download models: ```bash -llama download --source huggingface --model-id Llama3.1-8B-Instruct --hf-token +# Authenticate with Hugging Face +huggingface-cli login -llama download --source huggingface --model-id Llama3.1-70B-Instruct --hf-token - -llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original* -llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original* -``` - -**Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). - -```{tip} -Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. +# Download a model +huggingface-cli download meta-llama/Llama-3.2-3B-Instruct --local-dir ~/.llama/Llama-3.2-3B-Instruct ``` ## List the downloaded models -To list the downloaded models with the following command: -``` -llama model list --downloaded -``` - -You should see a table like this: -``` -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ -┃ Model ┃ Size ┃ Modified Time ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ -│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ -└─────────────────────────────────────────┴──────────┴─────────────────────┘ -``` - - -## Understand the models -The `llama model` command helps you explore the model’s interface. - -1. `download`: Download the model from different sources. (meta, huggingface) -2. `list`: Lists all the models available for download with hardware requirements for deploying the models. -3. `prompt-format`: Show llama model message formats. -4. `describe`: Describes all the properties of the model. - -### Sample Usage - -`llama model ` - -``` -llama model --help -``` -``` -usage: llama model [-h] {download,list,prompt-format,describe,verify-download,remove} ... - -Work with llama models - -options: - -h, --help show this help message and exit - -model_subcommands: - {download,list,prompt-format,describe,verify-download,remove} -``` - -### Describe - -You can use the describe command to know more about a model: -``` -llama model describe -m Llama3.2-3B-Instruct -``` -``` -+-----------------------------+----------------------------------+ -| Model | Llama3.2-3B-Instruct | -+-----------------------------+----------------------------------+ -| Hugging Face ID | meta-llama/Llama-3.2-3B-Instruct | -+-----------------------------+----------------------------------+ -| Description | Llama 3.2 3b instruct model | -+-----------------------------+----------------------------------+ -| Context Length | 128K tokens | -+-----------------------------+----------------------------------+ -| Weights format | bf16 | -+-----------------------------+----------------------------------+ -| Model params.json | { | -| | "dim": 3072, | -| | "n_layers": 28, | -| | "n_heads": 24, | -| | "n_kv_heads": 8, | -| | "vocab_size": 128256, | -| | "ffn_dim_multiplier": 1.0, | -| | "multiple_of": 256, | -| | "norm_eps": 1e-05, | -| | "rope_theta": 500000.0, | -| | "use_scaled_rope": true | -| | } | -+-----------------------------+----------------------------------+ -| Recommended sampling params | { | -| | "temperature": 1.0, | -| | "top_p": 0.9, | -| | "top_k": 0 | -| | } | -+-----------------------------+----------------------------------+ -``` - -### Prompt Format -You can even run `llama model prompt-format` see all of the templates and their tokens: - -``` -llama model prompt-format -m Llama3.2-3B-Instruct -``` -![alt text](/img/prompt-format.png) - - -You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios. - -**NOTE**: Outputs in terminal are color printed to show special tokens. - -### Remove model -You can run `llama model remove` to remove an unnecessary model: - -``` -llama model remove -m Llama-Guard-3-8B-int8 +To list the downloaded models, you can use the Hugging Face CLI: +```bash +# List all downloaded models in your local cache +huggingface-cli scan-cache ``` diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index bca505b5e..0ec9aa0e6 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -51,11 +51,11 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install uv\n", + "!pip install uv \"huggingface_hub[cli]\"\n", "\n", "MODEL=\"Llama-4-Scout-17B-16E-Instruct\"\n", "# get meta url from llama.com\n", - "!uv run --with llama-stack llama model download --source meta --model-id $MODEL --meta-url \n", + "huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL\n", "\n", "model_id = f\"meta-llama/{MODEL}\"" ] diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py deleted file mode 100644 index 70cb9f4db..000000000 --- a/llama_stack/cli/download.py +++ /dev/null @@ -1,495 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import asyncio -import json -import os -import shutil -import sys -from dataclasses import dataclass -from datetime import UTC, datetime -from functools import partial -from pathlib import Path - -import httpx -from pydantic import BaseModel, ConfigDict -from rich.console import Console -from rich.progress import ( - BarColumn, - DownloadColumn, - Progress, - TextColumn, - TimeRemainingColumn, - TransferSpeedColumn, -) -from termcolor import cprint - -from llama_stack.cli.subcommand import Subcommand -from llama_stack.models.llama.sku_list import LlamaDownloadInfo -from llama_stack.models.llama.sku_types import Model - - -class Download(Subcommand): - """Llama cli for downloading llama toolchain assets""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "download", - prog="llama download", - description="Download a model from llama.meta.com or Hugging Face Hub", - formatter_class=argparse.RawTextHelpFormatter, - ) - setup_download_parser(self.parser) - - -def setup_download_parser(parser: argparse.ArgumentParser) -> None: - parser.add_argument( - "--source", - choices=["meta", "huggingface"], - default="meta", - ) - parser.add_argument( - "--model-id", - required=False, - help="See `llama model list` or `llama model list --show-all` for the list of available models. Specify multiple model IDs with commas, e.g. --model-id Llama3.2-1B,Llama3.2-3B", - ) - parser.add_argument( - "--hf-token", - type=str, - required=False, - default=None, - help="Hugging Face API token. Needed for gated models like llama2/3. Will also try to read environment variable `HF_TOKEN` as default.", - ) - parser.add_argument( - "--meta-url", - type=str, - required=False, - help="For source=meta, URL obtained from llama.meta.com after accepting license terms", - ) - parser.add_argument( - "--max-parallel", - type=int, - required=False, - default=3, - help="Maximum number of concurrent downloads", - ) - parser.add_argument( - "--ignore-patterns", - type=str, - required=False, - default="*.safetensors", - help="""For source=huggingface, files matching any of the patterns are not downloaded. Defaults to ignoring -safetensors files to avoid downloading duplicate weights. -""", - ) - parser.add_argument( - "--manifest-file", - type=str, - help="For source=meta, you can download models from a manifest file containing a file => URL mapping", - required=False, - ) - parser.set_defaults(func=partial(run_download_cmd, parser=parser)) - - -@dataclass -class DownloadTask: - url: str - output_file: str - total_size: int = 0 - downloaded_size: int = 0 - task_id: int | None = None - retries: int = 0 - max_retries: int = 3 - - -class DownloadError(Exception): - pass - - -class CustomTransferSpeedColumn(TransferSpeedColumn): - def render(self, task): - if task.finished: - return "-" - return super().render(task) - - -class ParallelDownloader: - def __init__( - self, - max_concurrent_downloads: int = 3, - buffer_size: int = 1024 * 1024, - timeout: int = 30, - ): - self.max_concurrent_downloads = max_concurrent_downloads - self.buffer_size = buffer_size - self.timeout = timeout - self.console = Console() - self.progress = Progress( - TextColumn("[bold blue]{task.description}"), - BarColumn(bar_width=40), - "[progress.percentage]{task.percentage:>3.1f}%", - DownloadColumn(), - CustomTransferSpeedColumn(), - TimeRemainingColumn(), - console=self.console, - expand=True, - ) - self.client_options = { - "timeout": httpx.Timeout(timeout), - "follow_redirects": True, - } - - async def retry_with_exponential_backoff(self, task: DownloadTask, func, *args, **kwargs): - last_exception = None - for attempt in range(task.max_retries): - try: - return await func(*args, **kwargs) - except Exception as e: - last_exception = e - if attempt < task.max_retries - 1: - wait_time = min(30, 2**attempt) # Cap at 30 seconds - self.console.print( - f"[yellow]Attempt {attempt + 1}/{task.max_retries} failed, " - f"retrying in {wait_time} seconds: {str(e)}[/yellow]" - ) - await asyncio.sleep(wait_time) - continue - raise last_exception - - async def get_file_info(self, client: httpx.AsyncClient, task: DownloadTask) -> None: - if task.total_size > 0: - self.progress.update(task.task_id, total=task.total_size) - return - - async def _get_info(): - response = await client.head(task.url, headers={"Accept-Encoding": "identity"}, **self.client_options) - response.raise_for_status() - return response - - try: - response = await self.retry_with_exponential_backoff(task, _get_info) - - task.url = str(response.url) - task.total_size = int(response.headers.get("Content-Length", 0)) - - if task.total_size == 0: - raise DownloadError( - f"Unable to determine file size for {task.output_file}. " - "The server might not support range requests." - ) - - # Update the progress bar's total size once we know it - if task.task_id is not None: - self.progress.update(task.task_id, total=task.total_size) - - except httpx.HTTPError as e: - self.console.print(f"[red]Error getting file info: {str(e)}[/red]") - raise - - def verify_file_integrity(self, task: DownloadTask) -> bool: - if not os.path.exists(task.output_file): - return False - return os.path.getsize(task.output_file) == task.total_size - - async def download_chunk(self, client: httpx.AsyncClient, task: DownloadTask, start: int, end: int) -> None: - async def _download_chunk(): - headers = {"Range": f"bytes={start}-{end}"} - async with client.stream("GET", task.url, headers=headers, **self.client_options) as response: - response.raise_for_status() - - with open(task.output_file, "ab") as file: - file.seek(start) - async for chunk in response.aiter_bytes(self.buffer_size): - file.write(chunk) - task.downloaded_size += len(chunk) - self.progress.update( - task.task_id, - completed=task.downloaded_size, - ) - - try: - await self.retry_with_exponential_backoff(task, _download_chunk) - except Exception as e: - raise DownloadError( - f"Failed to download chunk {start}-{end} after {task.max_retries} attempts: {str(e)}" - ) from e - - async def prepare_download(self, task: DownloadTask) -> None: - output_dir = os.path.dirname(task.output_file) - os.makedirs(output_dir, exist_ok=True) - - if os.path.exists(task.output_file): - task.downloaded_size = os.path.getsize(task.output_file) - - async def download_file(self, task: DownloadTask) -> None: - try: - async with httpx.AsyncClient(**self.client_options) as client: - await self.get_file_info(client, task) - - # Check if file is already downloaded - if os.path.exists(task.output_file): - if self.verify_file_integrity(task): - self.console.print(f"[green]Already downloaded {task.output_file}[/green]") - self.progress.update(task.task_id, completed=task.total_size) - return - - await self.prepare_download(task) - - try: - # Split the remaining download into chunks - chunk_size = 27_000_000_000 # Cloudfront max chunk size - chunks = [] - - current_pos = task.downloaded_size - while current_pos < task.total_size: - chunk_end = min(current_pos + chunk_size - 1, task.total_size - 1) - chunks.append((current_pos, chunk_end)) - current_pos = chunk_end + 1 - - # Download chunks in sequence - for chunk_start, chunk_end in chunks: - await self.download_chunk(client, task, chunk_start, chunk_end) - - except Exception as e: - raise DownloadError(f"Download failed: {str(e)}") from e - - except Exception as e: - self.progress.update(task.task_id, description=f"[red]Failed: {task.output_file}[/red]") - raise DownloadError(f"Download failed for {task.output_file}: {str(e)}") from e - - def has_disk_space(self, tasks: list[DownloadTask]) -> bool: - try: - total_remaining_size = sum(task.total_size - task.downloaded_size for task in tasks) - dir_path = os.path.dirname(os.path.abspath(tasks[0].output_file)) - free_space = shutil.disk_usage(dir_path).free - - # Add 10% buffer for safety - required_space = int(total_remaining_size * 1.1) - - if free_space < required_space: - self.console.print( - f"[red]Not enough disk space. Required: {required_space // (1024 * 1024)} MB, " - f"Available: {free_space // (1024 * 1024)} MB[/red]" - ) - return False - return True - - except Exception as e: - raise DownloadError(f"Failed to check disk space: {str(e)}") from e - - async def download_all(self, tasks: list[DownloadTask]) -> None: - if not tasks: - raise ValueError("No download tasks provided") - - if not os.environ.get("LLAMA_DOWNLOAD_NO_SPACE_CHECK") and not self.has_disk_space(tasks): - raise DownloadError("Insufficient disk space for downloads") - - failed_tasks = [] - - with self.progress: - for task in tasks: - desc = f"Downloading {Path(task.output_file).name}" - task.task_id = self.progress.add_task(desc, total=task.total_size, completed=task.downloaded_size) - - semaphore = asyncio.Semaphore(self.max_concurrent_downloads) - - async def download_with_semaphore(task: DownloadTask): - async with semaphore: - try: - await self.download_file(task) - except Exception as e: - failed_tasks.append((task, str(e))) - - await asyncio.gather(*(download_with_semaphore(task) for task in tasks)) - - if failed_tasks: - self.console.print("\n[red]Some downloads failed:[/red]") - for task, error in failed_tasks: - self.console.print(f"[red]- {Path(task.output_file).name}: {error}[/red]") - raise DownloadError(f"{len(failed_tasks)} downloads failed") - - -def _hf_download( - model: "Model", - hf_token: str, - ignore_patterns: str, - parser: argparse.ArgumentParser, -): - from huggingface_hub import snapshot_download - from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError - - from llama_stack.core.utils.model_utils import model_local_dir - - repo_id = model.huggingface_repo - if repo_id is None: - raise ValueError(f"No repo id found for model {model.descriptor()}") - - output_dir = model_local_dir(model.descriptor()) - os.makedirs(output_dir, exist_ok=True) - try: - true_output_dir = snapshot_download( - repo_id, - local_dir=output_dir, - ignore_patterns=ignore_patterns, - token=hf_token, - library_name="llama-stack", - ) - except GatedRepoError: - parser.error( - "It looks like you are trying to access a gated repository. Please ensure you " - "have access to the repository and have provided the proper Hugging Face API token " - "using the option `--hf-token` or by running `huggingface-cli login`." - "You can find your token by visiting https://huggingface.co/settings/tokens" - ) - except RepositoryNotFoundError: - parser.error(f"Repository '{repo_id}' not found on the Hugging Face Hub or incorrect Hugging Face token.") - except Exception as e: - parser.error(e) - - print(f"\nSuccessfully downloaded model to {true_output_dir}") - - -def _meta_download( - model: "Model", - model_id: str, - meta_url: str, - info: "LlamaDownloadInfo", - max_concurrent_downloads: int, -): - from llama_stack.core.utils.model_utils import model_local_dir - - output_dir = Path(model_local_dir(model.descriptor())) - os.makedirs(output_dir, exist_ok=True) - - # Create download tasks for each file - tasks = [] - for f in info.files: - output_file = str(output_dir / f) - url = meta_url.replace("*", f"{info.folder}/{f}") - total_size = info.pth_size if "consolidated" in f else 0 - tasks.append(DownloadTask(url=url, output_file=output_file, total_size=total_size, max_retries=3)) - - # Initialize and run parallel downloader - downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads) - asyncio.run(downloader.download_all(tasks)) - - cprint(f"\nSuccessfully downloaded model to {output_dir}", color="green", file=sys.stderr) - cprint( - f"\nView MD5 checksum files at: {output_dir / 'checklist.chk'}", - file=sys.stderr, - ) - cprint( - f"\n[Optionally] To run MD5 checksums, use the following command: llama model verify-download --model-id {model_id}", - color="yellow", - file=sys.stderr, - ) - - -class ModelEntry(BaseModel): - model_id: str - files: dict[str, str] - - model_config = ConfigDict(protected_namespaces=()) - - -class Manifest(BaseModel): - models: list[ModelEntry] - expires_on: datetime - - -def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int): - from llama_stack.core.utils.model_utils import model_local_dir - - with open(manifest_file) as f: - d = json.load(f) - manifest = Manifest(**d) - - if datetime.now(UTC) > manifest.expires_on.astimezone(UTC): - raise ValueError(f"Manifest URLs have expired on {manifest.expires_on}") - - console = Console() - for entry in manifest.models: - console.print(f"[blue]Downloading model {entry.model_id}...[/blue]") - output_dir = Path(model_local_dir(entry.model_id)) - os.makedirs(output_dir, exist_ok=True) - - if any(output_dir.iterdir()): - console.print(f"[yellow]Output directory {output_dir} is not empty.[/yellow]") - - while True: - resp = input("Do you want to (C)ontinue download or (R)estart completely? (continue/restart): ") - if resp.lower() in ["restart", "r"]: - shutil.rmtree(output_dir) - os.makedirs(output_dir, exist_ok=True) - break - elif resp.lower() in ["continue", "c"]: - console.print("[blue]Continuing download...[/blue]") - break - else: - console.print("[red]Invalid response. Please try again.[/red]") - - # Create download tasks for all files in the manifest - tasks = [ - DownloadTask(url=url, output_file=str(output_dir / fname), max_retries=3) - for fname, url in entry.files.items() - ] - - # Initialize and run parallel downloader - downloader = ParallelDownloader(max_concurrent_downloads=max_concurrent_downloads) - asyncio.run(downloader.download_all(tasks)) - - -def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): - """Main download command handler""" - try: - if args.manifest_file: - _download_from_manifest(args.manifest_file, args.max_parallel) - return - - if args.model_id is None: - parser.error("Please provide a model id") - return - - # Handle comma-separated model IDs - model_ids = [model_id.strip() for model_id in args.model_id.split(",")] - - from llama_stack.models.llama.sku_list import llama_meta_net_info, resolve_model - - from .model.safety_models import ( - prompt_guard_download_info_map, - prompt_guard_model_sku_map, - ) - - prompt_guard_model_sku_map = prompt_guard_model_sku_map() - prompt_guard_download_info_map = prompt_guard_download_info_map() - - for model_id in model_ids: - if model_id in prompt_guard_model_sku_map.keys(): - model = prompt_guard_model_sku_map[model_id] - info = prompt_guard_download_info_map[model_id] - else: - model = resolve_model(model_id) - if model is None: - parser.error(f"Model {model_id} not found") - continue - info = llama_meta_net_info(model) - - if args.source == "huggingface": - _hf_download(model, args.hf_token, args.ignore_patterns, parser) - else: - meta_url = args.meta_url or input( - f"Please provide the signed URL for model {model_id} you received via email " - f"after visiting https://www.llama.com/llama-downloads/ " - f"(e.g., https://llama3-1.llamameta.net/*?Policy...): " - ) - if "llamameta.net" not in meta_url: - parser.error("Invalid Meta URL provided") - _meta_download(model, model_id, meta_url, info, args.max_parallel) - - except Exception as e: - parser.error(f"Download failed: {str(e)}") diff --git a/llama_stack/cli/llama.py b/llama_stack/cli/llama.py index 433b311e7..5ff15d8d7 100644 --- a/llama_stack/cli/llama.py +++ b/llama_stack/cli/llama.py @@ -6,11 +6,8 @@ import argparse -from .download import Download -from .model import ModelParser from .stack import StackParser from .stack.utils import print_subcommand_description -from .verify_download import VerifyDownload class LlamaCLIParser: @@ -30,10 +27,7 @@ class LlamaCLIParser: subparsers = self.parser.add_subparsers(title="subcommands") # Add sub-commands - ModelParser.create(subparsers) StackParser.create(subparsers) - Download.create(subparsers) - VerifyDownload.create(subparsers) print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/cli/model/__init__.py b/llama_stack/cli/model/__init__.py deleted file mode 100644 index db70364a9..000000000 --- a/llama_stack/cli/model/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .model import ModelParser # noqa diff --git a/llama_stack/cli/model/describe.py b/llama_stack/cli/model/describe.py deleted file mode 100644 index 26b0da686..000000000 --- a/llama_stack/cli/model/describe.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import json - -from llama_stack.cli.subcommand import Subcommand -from llama_stack.cli.table import print_table -from llama_stack.models.llama.sku_list import resolve_model - - -class ModelDescribe(Subcommand): - """Show details about a model""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "describe", - prog="llama model describe", - description="Show details about a llama model", - formatter_class=argparse.RawTextHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_model_describe_cmd) - - def _add_arguments(self): - self.parser.add_argument( - "-m", - "--model-id", - type=str, - required=True, - help="See `llama model list` or `llama model list --show-all` for the list of available models", - ) - - def _run_model_describe_cmd(self, args: argparse.Namespace) -> None: - from .safety_models import prompt_guard_model_sku_map - - prompt_guard_model_map = prompt_guard_model_sku_map() - if args.model_id in prompt_guard_model_map.keys(): - model = prompt_guard_model_map[args.model_id] - else: - model = resolve_model(args.model_id) - - if model is None: - self.parser.error( - f"Model {args.model_id} not found; try 'llama model list' for a list of available models." - ) - return - - headers = [ - "Model", - model.descriptor(), - ] - - rows = [ - ("Hugging Face ID", model.huggingface_repo or ""), - ("Description", model.description), - ("Context Length", f"{model.max_seq_length // 1024}K tokens"), - ("Weights format", model.quantization_format.value), - ("Model params.json", json.dumps(model.arch_args, indent=4)), - ] - - print_table( - rows, - headers, - separate_rows=True, - ) diff --git a/llama_stack/cli/model/download.py b/llama_stack/cli/model/download.py deleted file mode 100644 index a3b8f7796..000000000 --- a/llama_stack/cli/model/download.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse - -from llama_stack.cli.subcommand import Subcommand - - -class ModelDownload(Subcommand): - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "download", - prog="llama model download", - description="Download a model from llama.meta.com or Hugging Face Hub", - formatter_class=argparse.RawTextHelpFormatter, - ) - - from llama_stack.cli.download import setup_download_parser - - setup_download_parser(self.parser) diff --git a/llama_stack/cli/model/list.py b/llama_stack/cli/model/list.py deleted file mode 100644 index f46a8c88d..000000000 --- a/llama_stack/cli/model/list.py +++ /dev/null @@ -1,119 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import os -import time -from pathlib import Path - -from llama_stack.cli.subcommand import Subcommand -from llama_stack.cli.table import print_table -from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR -from llama_stack.models.llama.sku_list import all_registered_models - - -def _get_model_size(model_dir): - return sum(f.stat().st_size for f in Path(model_dir).rglob("*") if f.is_file()) - - -def _convert_to_model_descriptor(model): - for m in all_registered_models(): - if model == m.descriptor().replace(":", "-"): - return str(m.descriptor()) - return str(model) - - -def _run_model_list_downloaded_cmd() -> None: - headers = ["Model", "Size", "Modified Time"] - - rows = [] - for model in os.listdir(DEFAULT_CHECKPOINT_DIR): - abs_path = os.path.join(DEFAULT_CHECKPOINT_DIR, model) - space_usage = _get_model_size(abs_path) - model_size = f"{space_usage / (1024**3):.2f} GB" - modified_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(os.path.getmtime(abs_path))) - rows.append( - [ - _convert_to_model_descriptor(model), - model_size, - modified_time, - ] - ) - - print_table( - rows, - headers, - separate_rows=True, - ) - - -class ModelList(Subcommand): - """List available llama models""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "list", - prog="llama model list", - description="Show available llama models", - formatter_class=argparse.RawTextHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_model_list_cmd) - - def _add_arguments(self): - self.parser.add_argument( - "--show-all", - action="store_true", - help="Show all models (not just defaults)", - ) - self.parser.add_argument( - "--downloaded", - action="store_true", - help="List the downloaded models", - ) - self.parser.add_argument( - "-s", - "--search", - type=str, - required=False, - help="Search for the input string as a substring in the model descriptor(ID)", - ) - - def _run_model_list_cmd(self, args: argparse.Namespace) -> None: - from .safety_models import prompt_guard_model_skus - - if args.downloaded: - return _run_model_list_downloaded_cmd() - - headers = [ - "Model Descriptor(ID)", - "Hugging Face Repo", - "Context Length", - ] - - rows = [] - for model in all_registered_models() + prompt_guard_model_skus(): - if not args.show_all and not model.is_featured: - continue - - descriptor = model.descriptor() - if not args.search or args.search.lower() in descriptor.lower(): - rows.append( - [ - descriptor, - model.huggingface_repo, - f"{model.max_seq_length // 1024}K", - ] - ) - if len(rows) == 0: - print(f"Did not find any model matching `{args.search}`.") - else: - print_table( - rows, - headers, - separate_rows=True, - ) diff --git a/llama_stack/cli/model/model.py b/llama_stack/cli/model/model.py deleted file mode 100644 index 808029945..000000000 --- a/llama_stack/cli/model/model.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse - -from llama_stack.cli.model.describe import ModelDescribe -from llama_stack.cli.model.download import ModelDownload -from llama_stack.cli.model.list import ModelList -from llama_stack.cli.model.prompt_format import ModelPromptFormat -from llama_stack.cli.model.remove import ModelRemove -from llama_stack.cli.model.verify_download import ModelVerifyDownload -from llama_stack.cli.stack.utils import print_subcommand_description -from llama_stack.cli.subcommand import Subcommand - - -class ModelParser(Subcommand): - """Llama cli for model interface apis""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "model", - prog="llama model", - description="Work with llama models", - formatter_class=argparse.RawTextHelpFormatter, - ) - - self.parser.set_defaults(func=lambda args: self.parser.print_help()) - - subparsers = self.parser.add_subparsers(title="model_subcommands") - - # Add sub-commands - ModelDownload.create(subparsers) - ModelList.create(subparsers) - ModelPromptFormat.create(subparsers) - ModelDescribe.create(subparsers) - ModelVerifyDownload.create(subparsers) - ModelRemove.create(subparsers) - - print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/cli/model/prompt_format.py b/llama_stack/cli/model/prompt_format.py deleted file mode 100644 index 673487812..000000000 --- a/llama_stack/cli/model/prompt_format.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import textwrap -from io import StringIO -from pathlib import Path - -from llama_stack.cli.subcommand import Subcommand -from llama_stack.cli.table import print_table -from llama_stack.models.llama.sku_types import CoreModelId, ModelFamily, is_multimodal, model_family - -ROOT_DIR = Path(__file__).parent.parent.parent - - -class ModelPromptFormat(Subcommand): - """Llama model cli for describe a model prompt format (message formats)""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "prompt-format", - prog="llama model prompt-format", - description="Show llama model message formats", - epilog=textwrap.dedent( - """ - Example: - llama model prompt-format - """ - ), - formatter_class=argparse.RawTextHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_model_template_cmd) - - def _add_arguments(self): - self.parser.add_argument( - "-m", - "--model-name", - type=str, - help="Example: Llama3.1-8B or Llama3.2-11B-Vision, etc\n" - "(Run `llama model list` to see a list of valid model names)", - ) - self.parser.add_argument( - "-l", - "--list", - action="store_true", - help="List all available models", - ) - - def _run_model_template_cmd(self, args: argparse.Namespace) -> None: - import importlib.resources - - # Only Llama 3.1 and 3.2 are supported - supported_model_ids = [ - m for m in CoreModelId if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2} - ] - - model_list = [m.value for m in supported_model_ids] - - if args.list: - headers = ["Model(s)"] - rows = [] - for m in model_list: - rows.append( - [ - m, - ] - ) - print_table( - rows, - headers, - separate_rows=True, - ) - return - - try: - model_id = CoreModelId(args.model_name) - except ValueError: - self.parser.error( - f"{args.model_name} is not a valid Model. Choose one from the list of valid models. " - f"Run `llama model list` to see the valid model names." - ) - - if model_id not in supported_model_ids: - self.parser.error( - f"{model_id} is not a valid Model. Choose one from the list of valid models. " - f"Run `llama model list` to see the valid model names." - ) - - llama_3_1_file = ROOT_DIR / "models" / "llama" / "llama3_1" / "prompt_format.md" - llama_3_2_text_file = ROOT_DIR / "models" / "llama" / "llama3_2" / "text_prompt_format.md" - llama_3_2_vision_file = ROOT_DIR / "models" / "llama" / "llama3_2" / "vision_prompt_format.md" - if model_family(model_id) == ModelFamily.llama3_1: - with importlib.resources.as_file(llama_3_1_file) as f: - content = f.open("r").read() - elif model_family(model_id) == ModelFamily.llama3_2: - if is_multimodal(model_id): - with importlib.resources.as_file(llama_3_2_vision_file) as f: - content = f.open("r").read() - else: - with importlib.resources.as_file(llama_3_2_text_file) as f: - content = f.open("r").read() - - render_markdown_to_pager(content) - - -def render_markdown_to_pager(markdown_content: str): - from rich.console import Console - from rich.markdown import Markdown - from rich.style import Style - from rich.text import Text - - class LeftAlignedHeaderMarkdown(Markdown): - def parse_header(self, token): - level = token.type.count("h") - content = Text(token.content) - header_style = Style(color="bright_blue", bold=True) - header = Text(f"{'#' * level} ", style=header_style) + content - self.add_text(header) - - # Render the Markdown - md = LeftAlignedHeaderMarkdown(markdown_content) - - # Capture the rendered output - output = StringIO() - console = Console(file=output, force_terminal=True, width=100) # Set a fixed width - console.print(md) - rendered_content = output.getvalue() - print(rendered_content) diff --git a/llama_stack/cli/model/remove.py b/llama_stack/cli/model/remove.py deleted file mode 100644 index 138e06a2a..000000000 --- a/llama_stack/cli/model/remove.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import os -import shutil - -from llama_stack.cli.subcommand import Subcommand -from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR -from llama_stack.models.llama.sku_list import resolve_model - - -class ModelRemove(Subcommand): - """Remove the downloaded llama model""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "remove", - prog="llama model remove", - description="Remove the downloaded llama model", - formatter_class=argparse.RawTextHelpFormatter, - ) - self._add_arguments() - self.parser.set_defaults(func=self._run_model_remove_cmd) - - def _add_arguments(self): - self.parser.add_argument( - "-m", - "--model", - required=True, - help="Specify the llama downloaded model name, see `llama model list --downloaded`", - ) - self.parser.add_argument( - "-f", - "--force", - action="store_true", - help="Used to forcefully remove the llama model from the storage without further confirmation", - ) - - def _run_model_remove_cmd(self, args: argparse.Namespace) -> None: - from .safety_models import prompt_guard_model_sku_map - - prompt_guard_model_map = prompt_guard_model_sku_map() - - if args.model in prompt_guard_model_map.keys(): - model = prompt_guard_model_map[args.model] - else: - model = resolve_model(args.model) - - model_path = os.path.join(DEFAULT_CHECKPOINT_DIR, args.model.replace(":", "-")) - - if model is None or not os.path.isdir(model_path): - print(f"'{args.model}' is not a valid llama model or does not exist.") - return - - if args.force: - shutil.rmtree(model_path) - print(f"{args.model} removed.") - else: - if input(f"Are you sure you want to remove {args.model}? (y/n): ").strip().lower() == "y": - shutil.rmtree(model_path) - print(f"{args.model} removed.") - else: - print("Removal aborted.") diff --git a/llama_stack/cli/model/safety_models.py b/llama_stack/cli/model/safety_models.py deleted file mode 100644 index e31767f13..000000000 --- a/llama_stack/cli/model/safety_models.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Any - -from pydantic import BaseModel, ConfigDict, Field - -from llama_stack.models.llama.sku_list import LlamaDownloadInfo -from llama_stack.models.llama.sku_types import CheckpointQuantizationFormat - - -class PromptGuardModel(BaseModel): - """Make a 'fake' Model-like object for Prompt Guard. Eventually this will be removed.""" - - model_id: str - huggingface_repo: str - description: str = "Prompt Guard. NOTE: this model will not be provided via `llama` CLI soon." - is_featured: bool = False - max_seq_length: int = 512 - is_instruct_model: bool = False - quantization_format: CheckpointQuantizationFormat = CheckpointQuantizationFormat.bf16 - arch_args: dict[str, Any] = Field(default_factory=dict) - - def descriptor(self) -> str: - return self.model_id - - model_config = ConfigDict(protected_namespaces=()) - - -def prompt_guard_model_skus(): - return [ - PromptGuardModel(model_id="Prompt-Guard-86M", huggingface_repo="meta-llama/Prompt-Guard-86M"), - PromptGuardModel( - model_id="Llama-Prompt-Guard-2-86M", - huggingface_repo="meta-llama/Llama-Prompt-Guard-2-86M", - ), - PromptGuardModel( - model_id="Llama-Prompt-Guard-2-22M", - huggingface_repo="meta-llama/Llama-Prompt-Guard-2-22M", - ), - ] - - -def prompt_guard_model_sku_map() -> dict[str, Any]: - return {model.model_id: model for model in prompt_guard_model_skus()} - - -def prompt_guard_download_info_map() -> dict[str, LlamaDownloadInfo]: - return { - model.model_id: LlamaDownloadInfo( - folder="Prompt-Guard" if model.model_id == "Prompt-Guard-86M" else model.model_id, - files=[ - "model.safetensors", - "special_tokens_map.json", - "tokenizer.json", - "tokenizer_config.json", - ], - pth_size=1, - ) - for model in prompt_guard_model_skus() - } diff --git a/llama_stack/cli/model/verify_download.py b/llama_stack/cli/model/verify_download.py deleted file mode 100644 index e7159c0aa..000000000 --- a/llama_stack/cli/model/verify_download.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse - -from llama_stack.cli.subcommand import Subcommand - - -class ModelVerifyDownload(Subcommand): - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "verify-download", - prog="llama model verify-download", - description="Verify the downloaded checkpoints' checksums for models downloaded from Meta", - formatter_class=argparse.RawTextHelpFormatter, - ) - - from llama_stack.cli.verify_download import setup_verify_download_parser - - setup_verify_download_parser(self.parser) diff --git a/llama_stack/cli/verify_download.py b/llama_stack/cli/verify_download.py deleted file mode 100644 index e738abb4f..000000000 --- a/llama_stack/cli/verify_download.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import argparse -import hashlib -from dataclasses import dataclass -from functools import partial -from pathlib import Path - -from rich.console import Console -from rich.progress import Progress, SpinnerColumn, TextColumn - -from llama_stack.cli.subcommand import Subcommand - - -@dataclass -class VerificationResult: - filename: str - expected_hash: str - actual_hash: str | None - exists: bool - matches: bool - - -class VerifyDownload(Subcommand): - """Llama cli for verifying downloaded model files""" - - def __init__(self, subparsers: argparse._SubParsersAction): - super().__init__() - self.parser = subparsers.add_parser( - "verify-download", - prog="llama verify-download", - description="Verify integrity of downloaded model files", - formatter_class=argparse.RawTextHelpFormatter, - ) - setup_verify_download_parser(self.parser) - - -def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None: - parser.add_argument( - "--model-id", - required=True, - help="Model ID to verify (only for models downloaded from Meta)", - ) - parser.set_defaults(func=partial(run_verify_cmd, parser=parser)) - - -def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str: - sha256_hash = hashlib.sha256() - with open(filepath, "rb") as f: - for chunk in iter(lambda: f.read(chunk_size), b""): - sha256_hash.update(chunk) - return sha256_hash.hexdigest() - - -def load_checksums(checklist_path: Path) -> dict[str, str]: - checksums = {} - with open(checklist_path) as f: - for line in f: - if line.strip(): - sha256sum, filepath = line.strip().split(" ", 1) - # Remove leading './' if present - filepath = filepath.lstrip("./") - checksums[filepath] = sha256sum - return checksums - - -def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -> list[VerificationResult]: - results = [] - - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - console=console, - ) as progress: - for filepath, expected_hash in checksums.items(): - full_path = model_dir / filepath - task_id = progress.add_task(f"Verifying {filepath}...", total=None) - - exists = full_path.exists() - actual_hash = None - matches = False - - if exists: - actual_hash = calculate_sha256(full_path) - matches = actual_hash == expected_hash - - results.append( - VerificationResult( - filename=filepath, - expected_hash=expected_hash, - actual_hash=actual_hash, - exists=exists, - matches=matches, - ) - ) - - progress.remove_task(task_id) - - return results - - -def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): - from llama_stack.core.utils.model_utils import model_local_dir - - console = Console() - model_dir = Path(model_local_dir(args.model_id)) - checklist_path = model_dir / "checklist.chk" - - if not model_dir.exists(): - parser.error(f"Model directory not found: {model_dir}") - - if not checklist_path.exists(): - parser.error(f"Checklist file not found: {checklist_path}") - - checksums = load_checksums(checklist_path) - results = verify_files(model_dir, checksums, console) - - # Print results - console.print("\nVerification Results:") - - all_good = True - for result in results: - if not result.exists: - console.print(f"[red]❌ {result.filename}: File not found[/red]") - all_good = False - elif not result.matches: - console.print( - f"[red]❌ {result.filename}: Hash mismatch[/red]\n" - f" Expected: {result.expected_hash}\n" - f" Got: {result.actual_hash}" - ) - all_good = False - else: - console.print(f"[green]✓ {result.filename}: Verified[/green]") - - if all_good: - console.print("\n[green]All files verified successfully![/green]") diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md index 92dcc6102..a7e8c2d67 100644 --- a/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -29,31 +29,7 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. - -``` -$ llama model list --downloaded -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ -┃ Model ┃ Size ┃ Modified Time ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ -│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ -├─────────────────────────────────────────┼──────────┼─────────────────────┤ -│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ -└─────────────────────────────────────────┴──────────┴─────────────────────┘ +Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI. ``` ## Running the Distribution diff --git a/pyproject.toml b/pyproject.toml index df441e317..81997c249 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,14 +25,13 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", # server - "fire", # for MCP in LLS client + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", - "huggingface-hub>=0.34.0,<1.0", "jinja2>=3.1.6", "jsonschema", "llama-stack-client>=0.2.23", - "openai>=1.107", # for expires_after support + "openai>=1.107", # for expires_after support "prompt-toolkit", "python-dotenv", "python-jose[cryptography]", @@ -43,13 +42,13 @@ dependencies = [ "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", # For fastapi Form - "uvicorn>=0.34.0", # server - "opentelemetry-sdk>=1.30.0", # server + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk>=1.30.0", # server "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server - "aiosqlite>=0.21.0", # server - for metadata store - "asyncpg", # for metadata store - "sqlalchemy[asyncio]>=2.0.41", # server - for conversations + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store + "sqlalchemy[asyncio]>=2.0.41", # server - for conversations ] [project.optional-dependencies] @@ -68,14 +67,14 @@ dev = [ "pytest-cov", "pytest-html", "pytest-json-report", - "pytest-socket", # For blocking network access in unit tests - "nbval", # For notebook testing + "pytest-socket", # For blocking network access in unit tests + "nbval", # For notebook testing "black", "ruff", "types-requests", "types-setuptools", "pre-commit", - "ruamel.yaml", # needed for openapi generator + "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. unit = [ @@ -141,9 +140,7 @@ docs = [ "requests", ] codegen = ["rich", "pydantic>=2.11.9", "jinja2>=3.1.6"] -benchmark = [ - "locust>=2.39.1", -] +benchmark = ["locust>=2.39.1"] [project.urls] Homepage = "https://github.com/llamastack/llama-stack" @@ -242,7 +239,6 @@ follow_imports = "silent" # to exclude the entire directory. exclude = [ # As we fix more and more of these, we should remove them from the list - "^llama_stack/cli/download\\.py$", "^llama_stack.core/build\\.py$", "^llama_stack.core/client\\.py$", "^llama_stack.core/request_headers\\.py$", @@ -332,6 +328,4 @@ classmethod-decorators = ["classmethod", "pydantic.field_validator"] [tool.pytest.ini_options] addopts = ["--durations=10"] asyncio_mode = "auto" -markers = [ - "allow_network: Allow network access for specific unit tests", -] +markers = ["allow_network: Allow network access for specific unit tests"] diff --git a/uv.lock b/uv.lock index 90b2832d8..11f396799 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -1774,7 +1774,6 @@ dependencies = [ { name = "fire" }, { name = "h11" }, { name = "httpx" }, - { name = "huggingface-hub" }, { name = "jinja2" }, { name = "jsonschema" }, { name = "llama-stack-client" }, @@ -1896,7 +1895,6 @@ requires-dist = [ { name = "fire" }, { name = "h11", specifier = ">=0.16.0" }, { name = "httpx" }, - { name = "huggingface-hub", specifier = ">=0.34.0,<1.0" }, { name = "jinja2", specifier = ">=3.1.6" }, { name = "jsonschema" }, { name = "llama-stack-client", specifier = ">=0.2.23" },