Merge f99ca37f91 into 40fdce79b3

2025-06-27 18:50:41 +00:00 · 2025-06-27 11:39:51 +02:00 · 2025-06-27 11:39:51 +02:00 · 79147a554a
commit 79147a554a
parent 40fdce79b3 f99ca37f91
8 changed files with 36 additions and 27 deletions
--- a/docs/source/distributions/k8s/apply.sh
+++ b/docs/source/distributions/k8s/apply.sh
@ -16,6 +16,14 @@ export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B}
 set -euo pipefail
 set -x

+# Install NVIDIA device plugin for GPU support
+echo "Installing NVIDIA device plugin..."
+kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/refs/tags/v0.17.2/deployments/static/nvidia-device-plugin.yml
+
+# Wait for NVIDIA device plugin to be ready
+echo "Waiting for NVIDIA device plugin to be ready..."
+kubectl wait --for=condition=ready pod -l name=nvidia-device-plugin-ds -n kube-system --timeout=300s
+
 envsubst < ./vllm-k8s.yaml.template | kubectl apply -f -
 envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f -
 envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
--- a/docs/source/distributions/k8s/chroma-k8s.yaml.template
+++ b/docs/source/distributions/k8s/chroma-k8s.yaml.template
@ -5,6 +5,7 @@ metadata:
 spec:
  accessModes:
    - ReadWriteOnce
+  storageClassName: gp2
  resources:
    requests:
      storage: 20Gi
@ -23,6 +24,8 @@ spec:
      labels:
        app: chromadb
    spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
      containers:
      - name: chromadb
        image: chromadb/chroma:latest
--- a/docs/source/distributions/k8s/postgres-k8s.yaml.template
+++ b/docs/source/distributions/k8s/postgres-k8s.yaml.template
@ -5,6 +5,7 @@ metadata:
 spec:
  accessModes:
    - ReadWriteOnce
+  storageClassName: gp2
  resources:
    requests:
      storage: 10Gi
@ -23,6 +24,8 @@ spec:
      labels:
        app.kubernetes.io/name: postgres
    spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
      containers:
      - name: postgres
        image: postgres:15
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@ -5,6 +5,7 @@ metadata:
 spec:
  accessModes:
    - ReadWriteOnce
+  storageClassName: gp2
  resources:
    requests:
      storage: 1Gi
@ -25,9 +26,11 @@ spec:
        app.kubernetes.io/name: llama-stack
        app.kubernetes.io/component: server
    spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
      containers:
      - name: llama-stack
-        image: llamastack/distribution-remote-vllm:latest
+        image: llamastack/distribution-postgres-demo:latest
        imagePullPolicy: Always # since we have specified latest instead of a version
        env:
        - name: ENABLE_CHROMADB
--- a/docs/source/distributions/k8s/ui-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ui-k8s.yaml.template
@ -17,6 +17,8 @@ spec:
        app.kubernetes.io/name: llama-stack
        app.kubernetes.io/component: ui
    spec:
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: cpu
      containers:
      - name: llama-stack-ui
        image: node:18-alpine
--- a/docs/source/distributions/k8s/vllm-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-k8s.yaml.template
@ -6,6 +6,7 @@ spec:
  accessModes:
    - ReadWriteOnce
  volumeMode: Filesystem
+  storageClassName: gp2
  resources:
    requests:
      storage: 50Gi
@ -25,16 +26,8 @@ spec:
        app.kubernetes.io/name: vllm
        workload-type: inference
    spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
      containers:
      - name: vllm
        image: vllm/vllm-openai:latest
@ -49,6 +42,11 @@ spec:
              key: token
        ports:
          - containerPort: 8000
+        resources:
+          requests:
+            nvidia.com/gpu: 1
+          limits:
+            nvidia.com/gpu: 1
        volumeMounts:
          - name: llama-storage
            mountPath: /root/.cache/huggingface
--- a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
+++ b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template
@ -26,16 +26,8 @@ spec:
        app.kubernetes.io/name: vllm-safety
        workload-type: inference
    spec:
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: workload-type
-                operator: In
-                values:
-                - inference
-            topologyKey: kubernetes.io/hostname  # Ensures no two inference pods on same node
+      nodeSelector:
+        eks.amazonaws.com/nodegroup: gpu
      containers:
      - name: vllm-safety
        image: vllm/vllm-openai:latest
--- a/llama_stack/distribution/store/registry.py
+++ b/llama_stack/distribution/store/registry.py
@ -14,7 +14,6 @@ from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWith
 from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
-from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig

 logger = get_logger(__name__, category="core")

@ -193,12 +192,13 @@ async def create_dist_registry(
    image_name: str,
 ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
    # instantiate kvstore for storing and retrieving distribution metadata
-    if metadata_store:
-        dist_kvstore = await kvstore_impl(metadata_store)
-    else:
-        dist_kvstore = await kvstore_impl(
-            SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix())
+    if not metadata_store:
+        metadata_store = KVStoreConfig(
+            type="sqlite",
+            namespace=None,
+            db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix()
        )
+    dist_kvstore = await kvstore_impl(metadata_store)
    dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
    await dist_registry.initialize()
    return dist_registry, dist_kvstore