From f0170c5d3a735fd0413efa1837e799b3b86a4b7e Mon Sep 17 00:00:00 2001 From: Raghotham Murthy Date: Thu, 5 Jun 2025 02:03:21 -0700 Subject: [PATCH 1/2] chore: remove explicit sqlite dependency --- llama_stack/distribution/store/registry.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/distribution/store/registry.py index 0e84854c2..36a86a921 100644 --- a/llama_stack/distribution/store/registry.py +++ b/llama_stack/distribution/store/registry.py @@ -14,7 +14,6 @@ from llama_stack.distribution.datatypes import KVStoreConfig, RoutableObjectWith from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl -from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig logger = get_logger(__name__, category="core") @@ -193,12 +192,13 @@ async def create_dist_registry( image_name: str, ) -> tuple[CachedDiskDistributionRegistry, KVStore]: # instantiate kvstore for storing and retrieving distribution metadata - if metadata_store: - dist_kvstore = await kvstore_impl(metadata_store) - else: - dist_kvstore = await kvstore_impl( - SqliteKVStoreConfig(db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix()) + if not metadata_store: + metadata_store = KVStoreConfig( + type="sqlite", + namespace=None, + db_path=(DISTRIBS_BASE_DIR / image_name / "kvstore.db").as_posix() ) + dist_kvstore = await kvstore_impl(metadata_store) dist_registry = CachedDiskDistributionRegistry(dist_kvstore) await dist_registry.initialize() return dist_registry, dist_kvstore From f99ca37f91d43d86c450ba15a33a7f5988befc56 Mon Sep 17 00:00:00 2001 From: Raghotham Murthy Date: Tue, 24 Jun 2025 12:20:04 -0700 Subject: [PATCH 2/2] make it work on gpus --- docs/source/distributions/k8s/apply.sh | 8 ++++++++ .../distributions/k8s/chroma-k8s.yaml.template | 3 +++ .../k8s/postgres-k8s.yaml.template | 3 +++ .../distributions/k8s/stack-k8s.yaml.template | 5 ++++- .../distributions/k8s/ui-k8s.yaml.template | 2 ++ .../distributions/k8s/vllm-k8s.yaml.template | 18 ++++++++---------- .../k8s/vllm-safety-k8s.yaml.template | 12 ++---------- 7 files changed, 30 insertions(+), 21 deletions(-) diff --git a/docs/source/distributions/k8s/apply.sh b/docs/source/distributions/k8s/apply.sh index 7ff7d28eb..b29e18d2a 100755 --- a/docs/source/distributions/k8s/apply.sh +++ b/docs/source/distributions/k8s/apply.sh @@ -16,6 +16,14 @@ export SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B} set -euo pipefail set -x +# Install NVIDIA device plugin for GPU support +echo "Installing NVIDIA device plugin..." +kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/refs/tags/v0.17.2/deployments/static/nvidia-device-plugin.yml + +# Wait for NVIDIA device plugin to be ready +echo "Waiting for NVIDIA device plugin to be ready..." +kubectl wait --for=condition=ready pod -l name=nvidia-device-plugin-ds -n kube-system --timeout=300s + envsubst < ./vllm-k8s.yaml.template | kubectl apply -f - envsubst < ./vllm-safety-k8s.yaml.template | kubectl apply -f - envsubst < ./postgres-k8s.yaml.template | kubectl apply -f - diff --git a/docs/source/distributions/k8s/chroma-k8s.yaml.template b/docs/source/distributions/k8s/chroma-k8s.yaml.template index a2a5e3be3..2083a566b 100644 --- a/docs/source/distributions/k8s/chroma-k8s.yaml.template +++ b/docs/source/distributions/k8s/chroma-k8s.yaml.template @@ -5,6 +5,7 @@ metadata: spec: accessModes: - ReadWriteOnce + storageClassName: gp2 resources: requests: storage: 20Gi @@ -23,6 +24,8 @@ spec: labels: app: chromadb spec: + nodeSelector: + eks.amazonaws.com/nodegroup: cpu containers: - name: chromadb image: chromadb/chroma:latest diff --git a/docs/source/distributions/k8s/postgres-k8s.yaml.template b/docs/source/distributions/k8s/postgres-k8s.yaml.template index 86a765652..66e197b15 100644 --- a/docs/source/distributions/k8s/postgres-k8s.yaml.template +++ b/docs/source/distributions/k8s/postgres-k8s.yaml.template @@ -5,6 +5,7 @@ metadata: spec: accessModes: - ReadWriteOnce + storageClassName: gp2 resources: requests: storage: 10Gi @@ -23,6 +24,8 @@ spec: labels: app.kubernetes.io/name: postgres spec: + nodeSelector: + eks.amazonaws.com/nodegroup: cpu containers: - name: postgres image: postgres:15 diff --git a/docs/source/distributions/k8s/stack-k8s.yaml.template b/docs/source/distributions/k8s/stack-k8s.yaml.template index 1cfc63ef5..44f69f69f 100644 --- a/docs/source/distributions/k8s/stack-k8s.yaml.template +++ b/docs/source/distributions/k8s/stack-k8s.yaml.template @@ -5,6 +5,7 @@ metadata: spec: accessModes: - ReadWriteOnce + storageClassName: gp2 resources: requests: storage: 1Gi @@ -25,9 +26,11 @@ spec: app.kubernetes.io/name: llama-stack app.kubernetes.io/component: server spec: + nodeSelector: + eks.amazonaws.com/nodegroup: cpu containers: - name: llama-stack - image: llamastack/distribution-remote-vllm:latest + image: llamastack/distribution-postgres-demo:latest imagePullPolicy: Always # since we have specified latest instead of a version env: - name: ENABLE_CHROMADB diff --git a/docs/source/distributions/k8s/ui-k8s.yaml.template b/docs/source/distributions/k8s/ui-k8s.yaml.template index ef1bf0c55..ca429c029 100644 --- a/docs/source/distributions/k8s/ui-k8s.yaml.template +++ b/docs/source/distributions/k8s/ui-k8s.yaml.template @@ -17,6 +17,8 @@ spec: app.kubernetes.io/name: llama-stack app.kubernetes.io/component: ui spec: + nodeSelector: + eks.amazonaws.com/nodegroup: cpu containers: - name: llama-stack-ui image: node:18-alpine diff --git a/docs/source/distributions/k8s/vllm-k8s.yaml.template b/docs/source/distributions/k8s/vllm-k8s.yaml.template index 6256cc7e1..3988066b2 100644 --- a/docs/source/distributions/k8s/vllm-k8s.yaml.template +++ b/docs/source/distributions/k8s/vllm-k8s.yaml.template @@ -6,6 +6,7 @@ spec: accessModes: - ReadWriteOnce volumeMode: Filesystem + storageClassName: gp2 resources: requests: storage: 50Gi @@ -25,16 +26,8 @@ spec: app.kubernetes.io/name: vllm workload-type: inference spec: - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: workload-type - operator: In - values: - - inference - topologyKey: kubernetes.io/hostname # Ensures no two inference pods on same node + nodeSelector: + eks.amazonaws.com/nodegroup: gpu containers: - name: vllm image: vllm/vllm-openai:latest @@ -49,6 +42,11 @@ spec: key: token ports: - containerPort: 8000 + resources: + requests: + nvidia.com/gpu: 1 + limits: + nvidia.com/gpu: 1 volumeMounts: - name: llama-storage mountPath: /root/.cache/huggingface diff --git a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template index 8857e83b6..9bce4aa95 100644 --- a/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template +++ b/docs/source/distributions/k8s/vllm-safety-k8s.yaml.template @@ -26,16 +26,8 @@ spec: app.kubernetes.io/name: vllm-safety workload-type: inference spec: - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: workload-type - operator: In - values: - - inference - topologyKey: kubernetes.io/hostname # Ensures no two inference pods on same node + nodeSelector: + eks.amazonaws.com/nodegroup: gpu containers: - name: vllm-safety image: vllm/vllm-openai:latest