mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
.
This commit is contained in:
parent
2fc1c16d58
commit
048a030ed8
2 changed files with 148 additions and 0 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -18,3 +18,6 @@ Package.resolved
|
|||
.vscode
|
||||
_build
|
||||
docs/src
|
||||
.envs/
|
||||
faiss-env/
|
||||
.envrc
|
145
COMMANDS.md
Normal file
145
COMMANDS.md
Normal file
|
@ -0,0 +1,145 @@
|
|||
```bash
|
||||
|
||||
# Using Conda now
|
||||
python -m venv .venv
|
||||
source $STORAGE_DIR/llama-stack/.venv/bin/activate
|
||||
|
||||
source ~/miniconda3/bin/activate
|
||||
conda create --prefix ./envs python=3.10
|
||||
|
||||
source ~/miniconda3/bin/activate
|
||||
conda activate ./envs
|
||||
|
||||
pip install -e .
|
||||
|
||||
huggingface-cli login
|
||||
|
||||
export $(cat .env | xargs)
|
||||
|
||||
# Env vars:
|
||||
export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
|
||||
export LLAMA_STACK_PORT=5001
|
||||
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
export INFERENCE_PORT=8000
|
||||
export VLLM_URL=http://localhost:8000/v1
|
||||
export SQLITE_STORE_DIR=$LLAMA_STACK_CONFIG_DIR/distributions/meta-reference-gpu
|
||||
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||
|
||||
# vLLM server
|
||||
export $(cat .env | xargs)
|
||||
sudo docker run --gpus all \
|
||||
-v $STORAGE_DIR/.cache/huggingface:/root/.cache/huggingface \
|
||||
--env "HUGGING_FACE_HUB_TOKEN=$(cat ~/.cache/huggingface/token)" \
|
||||
-p 8000:$INFERENCE_PORT \
|
||||
--ipc=host \
|
||||
--net=host \
|
||||
vllm/vllm-openai:v0.6.3.post1 \
|
||||
--model $INFERENCE_MODEL
|
||||
|
||||
# Remote vLLM
|
||||
export $(cat .env | xargs)
|
||||
sudo docker run \
|
||||
-it \
|
||||
--net=host \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-remote-vllm:0.0.54 \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||
--env VLLM_URL=http://localhost:$INFERENCE_PORT/v1
|
||||
|
||||
llama model download --model-id meta-llama/Llama-3.2-3B-Instruct
|
||||
# Add in signed URL from email
|
||||
|
||||
# Meta reference gpu server
|
||||
export $(cat .env | xargs)
|
||||
sudo docker run \
|
||||
-it \
|
||||
-v ~/.llama:/root/.llama \
|
||||
--gpus all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
|
||||
# Fireworks server
|
||||
sudo docker run \
|
||||
-it \
|
||||
-v ~/run.yaml:/root/run.yaml \
|
||||
--net=host \
|
||||
llamastack/distribution-fireworks \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
|
||||
--env FIREWORKS_API_KEY=$FIREWORKS_API_KEY
|
||||
|
||||
|
||||
|
||||
llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT inference chat-completion --message "hello, what model are you?"
|
||||
|
||||
|
||||
|
||||
# Install the stack
|
||||
llama stack build --template remote-vllm --image-type conda
|
||||
# Run the stack
|
||||
conda activate llamastack-remote-vllm
|
||||
llama stack run run.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
|
||||
llama stack build --template meta-reference-gpu --image-type conda && llama stack run distributions/meta-reference-gpu/run.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
||||
|
||||
llama stack build --template meta-reference-gpu --image-type conda && llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
|
||||
--port 5001 \
|
||||
--env INFERENCE_MODEL=meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||
|
||||
llama download --model-id Llama3.2-11B-Vision-Instruct
|
||||
llama download --model-id Llama3.2-3B-Instruct
|
||||
llama download --model-id Llama-Guard-3-1B
|
||||
|
||||
ls $SQLITE_STORE_DIR
|
||||
sudo apt install sqlite3
|
||||
# Faiss store
|
||||
sqlite3 $SQLITE_STORE_DIR/faiss_store.db
|
||||
.tables
|
||||
.schema
|
||||
.headers ON
|
||||
.mode column
|
||||
.output sql.txt
|
||||
select key from kvstore;
|
||||
select * from kvstore where key = 'memory_banks:v1::test_bank_2';
|
||||
.output sql.txt;
|
||||
select * from kvstore where key = 'faiss_index:v1::test_bank_2';
|
||||
|
||||
# Registry
|
||||
sqlite3 $SQLITE_STORE_DIR/registry.db
|
||||
select key from kvstore;
|
||||
select * from kvstore where key = 'distributions:registry:v2::model:meta-llama/Llama-3.2-11B-Vision-Instruct';
|
||||
|
||||
# Agent store
|
||||
sqlite3 $SQLITE_STORE_DIR/agents_store.db
|
||||
select key from kvstore;
|
||||
# Session
|
||||
select * from kvstore where key = 'session:f4920b89-1035-4432-92ab-3d800878e28d:7b19e203-53cc-4295-b6cf-f0c400611ed1';
|
||||
# Turns
|
||||
.output sql.txt
|
||||
select * from kvstore where key = 'session:f4920b89-1035-4432-92ab-3d800878e28d:7b19e203-53cc-4295-b6cf-f0c400611ed1:e38da75e-70fb-4895-b522-b25373f3e8d5';
|
||||
# Agents
|
||||
select * from kvstore where key = 'agent:f4920b89-1035-4432-92ab-3d800878e28d';
|
||||
|
||||
|
||||
conda create --prefix ./faiss-env python=3.10
|
||||
|
||||
|
||||
source ~/miniconda3/bin/activate
|
||||
conda activate ./faiss-env
|
||||
|
||||
pip install "numpy<2.0" faiss-gpu aiosqlite sentence-transformers
|
||||
python inspect_faiss.py
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```
|
Loading…
Add table
Add a link
Reference in a new issue