mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-23 16:37:28 +00:00
Merge remote-tracking branch 'origin/main' into stores
This commit is contained in:
commit
b72154ce5e
1161 changed files with 609896 additions and 42960 deletions
|
@ -29,31 +29,7 @@ The following environment variables can be configured:
|
|||
|
||||
## Prerequisite: Downloading Models
|
||||
|
||||
Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints.
|
||||
|
||||
```
|
||||
$ llama model list --downloaded
|
||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ Model ┃ Size ┃ Modified Time ┃
|
||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩
|
||||
│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │
|
||||
├─────────────────────────────────────────┼──────────┼─────────────────────┤
|
||||
│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │
|
||||
└─────────────────────────────────────────┴──────────┴─────────────────────┘
|
||||
Please check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models using the Hugging Face CLI.
|
||||
```
|
||||
|
||||
## Running the Distribution
|
||||
|
@ -72,9 +48,9 @@ docker run \
|
|||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
@ -86,10 +62,10 @@ docker run \
|
|||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
-e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via venv
|
||||
|
@ -98,16 +74,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
|
|||
|
||||
```bash
|
||||
llama stack build --distro {{ name }} --image-type venv
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
llama stack run distributions/{{ name }}/run.yaml \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
--port 8321
|
||||
```
|
||||
|
||||
If you are using Llama Stack Safety / Shield APIs, use:
|
||||
|
||||
```bash
|
||||
INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
|
||||
llama stack run distributions/{{ name }}/run-with-safety.yaml \
|
||||
--port 8321 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
--port 8321
|
||||
```
|
||||
|
|
|
@ -114,6 +114,9 @@ metadata_store:
|
|||
inference_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/inference_store.db
|
||||
conversations_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/conversations.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue