Adding docker-compose.yaml, starting to simplify

2025-12-17 13:32:35 +00:00 · 2024-11-16 10:56:38 -08:00 · 2024-11-16 10:56:38 -08:00 · f38e76ee98
commit f38e76ee98
parent e4509cb568
14 changed files with 516 additions and 386 deletions
--- a/llama_stack/providers/remote/inference/vllm/docker_compose.yaml
+++ b/llama_stack/providers/remote/inference/vllm/docker_compose.yaml
@ -0,0 +1,26 @@
+services:
+  ${SERVICE_NAME:-vllm}:
+    image: vllm/vllm-openai:latest
+    ports:
+      - ${VLLM_PORT:-5100}:${VLLM_PORT:-5100}
+    volumes:
+      - $HOME/.cache/huggingface:/root/.cache/huggingface
+    devices:
+      - nvidia.com/gpu=all
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
+    runtime: nvidia
+    environment:
+      - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
+      - HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
+    command: >
+      --gpu-memory-utilization 0.75
+      --model ${VLLM_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
+      --enforce-eager
+      --max-model-len 8192
+      --max-num-seqs 16
+      --port ${VLLM_PORT:-5100}