add more distro templates (#279)

* verify dockers * together distro verified * readme * fireworks distro * fireworks compose up * fireworks verified
2025-12-06 02:30:58 +00:00 · 2024-10-21 18:15:08 -07:00 · 2024-10-21 18:15:08 -07:00 · 4d2bd2d39e
commit 4d2bd2d39e
parent cf27d19dd5
18 changed files with 265 additions and 42 deletions
--- a/distributions/tgi/cpu/compose.yaml
+++ b/distributions/tgi/cpu/compose.yaml
@ -6,28 +6,7 @@ services:
      - $HOME/.cache/huggingface:/data
    ports:
      - "5009:5009"
-    devices:
-      - nvidia.com/gpu=all
-    environment:
-      - CUDA_VISIBLE_DEVICES=0
-      - HF_HOME=/data
-      - HF_DATASETS_CACHE=/data
-      - HF_MODULES_CACHE=/data
-      - HF_HUB_CACHE=/data
    command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"]
-    deploy:
-      resources:
-        reservations:
-          devices:
-          - driver: nvidia
-            # that's the closest analogue to --gpus; provide
-            # an integer amount of devices or 'all'
-            count: 1
-            # Devices are reserved using a list of capabilities, making
-            # capabilities the only required field. A device MUST
-            # satisfy all the requested capabilities for a successful
-            # reservation.
-            capabilities: [gpu]
    runtime: nvidia
    healthcheck:
      test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"]