make it work on gpus

2025-12-17 00:29:26 +00:00 · 2025-06-24 12:20:04 -07:00 · 2025-06-24 12:20:04 -07:00 · f99ca37f91
commit f99ca37f91
parent ee96c4891b
7 changed files with 30 additions and 21 deletions
--- a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
@ -26,16 +26,8 @@ spec:
        app.kubernetes.io/name: vllm-safety
        workload-type: inference
    spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
      containers:
      - name: vllm-safety
        image: vllm/vllm-openai:latest