Convert TGI

2025-12-18 15:49:49 +00:00 · 2024-11-17 14:49:41 -08:00 · 2024-11-17 14:49:41 -08:00 · 028530546f
commit 028530546f
parent 9bb07ce298
14 changed files with 485 additions and 160 deletions
--- a/llama_stack/providers/remote/inference/vllm/docker_compose.yaml
+++ b/llama_stack/providers/remote/inference/vllm/docker_compose.yaml
@ -1,26 +0,0 @@
-services:
-  ${SERVICE_NAME:-vllm}:
-    image: vllm/vllm-openai:latest
-    ports:
-      - ${VLLM_PORT:-5100}:${VLLM_PORT:-5100}
-    volumes:
-      - $HOME/.cache/huggingface:/root/.cache/huggingface
-    devices:
-      - nvidia.com/gpu=all
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              capabilities: [gpu]
-    runtime: nvidia
-    environment:
-      - CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
-      - HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
-    command: >
-      --gpu-memory-utilization 0.75
-      --model ${VLLM_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
-      --enforce-eager
-      --max-model-len 8192
-      --max-num-seqs 16
-      --port ${VLLM_PORT:-5100}