add more distro templates (#279)

* verify dockers

* together distro verified

* readme

* fireworks distro

* fireworks compose up

* fireworks verified
This commit is contained in:
Xi Yan 2024-10-21 18:15:08 -07:00 committed by GitHub
parent cf27d19dd5
commit 4d2bd2d39e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 265 additions and 42 deletions

View file

@ -6,28 +6,7 @@ services:
- $HOME/.cache/huggingface:/data
ports:
- "5009:5009"
devices:
- nvidia.com/gpu=all
environment:
- CUDA_VISIBLE_DEVICES=0
- HF_HOME=/data
- HF_DATASETS_CACHE=/data
- HF_MODULES_CACHE=/data
- HF_HUB_CACHE=/data
command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"]
deploy:
resources:
reservations:
devices:
- driver: nvidia
# that's the closest analogue to --gpus; provide
# an integer amount of devices or 'all'
count: 1
# Devices are reserved using a list of capabilities, making
# capabilities the only required field. A device MUST
# satisfy all the requested capabilities for a successful
# reservation.
capabilities: [gpu]
runtime: nvidia
healthcheck:
test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"]