llama-stack-mirror/COMMANDS.md at a5cad8c1e13d4db87c5dba3945776e6c036ea906

mirror of https://github.com/meta-llama/llama-stack.git synced 2025-12-17 09:12:37 +00:00
Aidan Do a5cad8c1e1 .
2024-12-06 07:30:37 +00:00
3.4 KiB

Raw Blame History


# Using Conda now
python -m venv .venv
source $STORAGE_DIR/llama-stack/.venv/bin/activate

source ~/miniconda3/bin/activate
conda create --prefix ./envs python=3.10

source ~/miniconda3/bin/activate
conda activate ./envs

pip install pre-commit
pre-commit install

pip install -e .

pip install httpx==0.27.2

huggingface-cli login

export $(cat .env | xargs)

# Env vars:
export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
export LLAMA_STACK_PORT=5001
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
export INFERENCE_PORT=8000
export VLLM_URL=http://localhost:8000/v1
export SQLITE_STORE_DIR=$LLAMA_STACK_CONFIG_DIR/distributions/meta-reference-gpu
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B


llama model download --model-id meta-llama/Llama-3.2-3B-Instruct
# Add in signed URL from email

# Meta reference gpu server
sudo docker run \
  -it \
  -v ~/.llama:/root/.llama \
  --gpus all \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-meta-reference-gpu \
  --port $LLAMA_STACK_PORT \
  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct

# Fireworks server
sudo docker run \
    -it \
    -v ~/run.yaml:/root/run.yaml \
    --net=host \
    llamastack/distribution-fireworks \
    --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
    --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY



llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT   inference chat-completion   --message "hello, what model are you?"

llama download --model-id Llama3.2-11B-Vision-Instruct
llama download --model-id Llama3.2-3B-Instruct
llama download --model-id Llama-Guard-3-1B

llama stack build --template meta-reference-gpu --image-type conda && llama stack run distributions/meta-reference-gpu/run.yaml \
  --port 5001 \
  --env INFERENCE_MODEL=meta-llama/Llama-3.2-11B-Vision-Instruct

llama stack build --template meta-reference-gpu --image-type conda && llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
  --port 5001 \
  --env INFERENCE_MODEL=meta-llama/Llama-3.2-11B-Vision-Instruct


ls $SQLITE_STORE_DIR
sudo apt install sqlite3
# Faiss store
sqlite3 $SQLITE_STORE_DIR/faiss_store.db
.tables
.schema
.headers ON
.mode column
.output sql.txt
select key from kvstore;
select * from kvstore where key = 'memory_banks:v1::test_bank_2';
.output sql.txt;
select * from kvstore where key = 'faiss_index:v1::test_bank_2';

# Registry
sqlite3 $SQLITE_STORE_DIR/registry.db
select key from kvstore;
select * from kvstore where key = 'distributions:registry:v2::model:meta-llama/Llama-3.2-11B-Vision-Instruct';

# Agent store
sqlite3 $SQLITE_STORE_DIR/agents_store.db
select key from kvstore;
# Session
select * from kvstore where key = 'session:f4920b89-1035-4432-92ab-3d800878e28d:7b19e203-53cc-4295-b6cf-f0c400611ed1';
# Turns
.output sql.txt
select * from kvstore where key = 'session:f4920b89-1035-4432-92ab-3d800878e28d:7b19e203-53cc-4295-b6cf-f0c400611ed1:e38da75e-70fb-4895-b522-b25373f3e8d5';
# Agents
select * from kvstore where key = 'agent:f4920b89-1035-4432-92ab-3d800878e28d';


conda create --prefix ./faiss-env python=3.10


source ~/miniconda3/bin/activate
conda activate ./faiss-env

pip install "numpy<2.0" faiss-gpu aiosqlite sentence-transformers
python inspect_faiss.py




# Install pytest
pip install pytest pytest-asyncio pypdf pytest-httpx

# Run tests
source ~/miniconda3/bin/activate && conda activate llamastack-meta-reference-gpu
pytest llama_stack/providers/tests/memory/test_vector_store.py -v
3.4 KiB Raw Blame History

3.4 KiB

Raw Blame History