second checkpoint

2025-08-15 14:08:00 +00:00 · 2025-08-02 13:16:35 -07:00 · 2025-08-02 13:16:35 -07:00 · 645e55a450
commit 645e55a450
parent 67f19f76b2
4 changed files with 78 additions and 17 deletions
--- a/docs/source/distributions/k8s/install-prometheus.sh
+++ b/docs/source/distributions/k8s/install-prometheus.sh
@ -0,0 +1,38 @@
 #!/bin/bash
 # Script to install prometheus-community/kube-prometheus-stack using Helm
 # Exit immediately if a command exits with a non-zero status
 set -e
 # Add the Prometheus community Helm repository if it doesn't exist
 if ! helm repo list | grep -q "prometheus-community"; then
  echo "Adding prometheus-community Helm repository..."
  helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
 fi
 # Update Helm repositories
 echo "Updating Helm repositories..."
 helm repo update
 # Create namespace for monitoring if it doesn't exist
 if ! kubectl get namespace monitoring &> /dev/null; then
  echo "Creating monitoring namespace..."
  kubectl create namespace monitoring
 fi
 # Install kube-prometheus-stack
 echo "Installing kube-prometheus-stack..."
 helm install prometheus prometheus-community/kube-prometheus-stack \
  --namespace monitoring \
  --set grafana.enabled=true \
  --set prometheus.enabled=true \
  --set alertmanager.enabled=true \
  --set prometheus.service.type=ClusterIP \
  --set grafana.service.type=ClusterIP \
  --set alertmanager.service.type=ClusterIP \
  --set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
  --set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
 echo "kube-prometheus-stack has been installed successfully!"
 echo "To access Grafana UI, run: kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80"
 echo "Default Grafana credentials - Username: admin, Password: prom-operator"
--- a/docs/source/distributions/k8s/monitoring-k8s.yaml.template
+++ b/docs/source/distributions/k8s/monitoring-k8s.yaml.template
@ -3062,16 +3062,15 @@ spec:
            port:
              number: 9090
 ---
-# NVIDIA DCGM Exporter Deployment for GPU metrics
+# NVIDIA DCGM Exporter DaemonSet for GPU metrics
 apiVersion: apps/v1
-kind: Deployment
+kind: DaemonSet
 metadata:
  name: dcgm-exporter
  namespace: monitoring
  labels:
    app: dcgm-exporter
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: dcgm-exporter
@ -3082,7 +3081,7 @@ spec:
    spec:
      containers:
      - name: dcgm-exporter
-        image: nvcr.io/nvidia/k8s/dcgm-exporter:4.2.3-4.3.0-ubuntu22.04
+        image: nvidia/dcgm-exporter:3.2.5-3.1.7-ubuntu20.04
        securityContext:
          runAsNonRoot: false
          runAsUser: 0
@ -3093,17 +3092,19 @@ spec:
        - -f
        - /etc/dcgm-exporter/dcp-metrics-included.csv
        volumeMounts:
        - name: device-metrics
          mountPath: /dev/metrics
        - name: dcgm-config
          mountPath: /etc/dcgm-exporter
      volumes:
      - name: device-metrics
        hostPath:
          path: /dev/metrics
      - name: dcgm-config
        configMap:
          name: dcgm-config
      nodeSelector:
        kubernetes.io/os: linux
        nvidia.com/gpu.present: "true"
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule
 ---
 # DCGM Exporter ConfigMap for metrics configuration
 apiVersion: v1
--- a/docs/source/distributions/k8s/ollama-safety-k8s.yaml.template
+++ b/docs/source/distributions/k8s/ollama-safety-k8s.yaml.template
@ -63,7 +63,7 @@ spec:
 apiVersion: v1
 kind: Service
 metadata:
-  name: ollama-server-safety
+  name: ollama-safety
 spec:
  selector:
    app.kubernetes.io/name: ollama-safety
--- a/docs/source/distributions/k8s/stack-k8s.yaml.template
+++ b/docs/source/distributions/k8s/stack-k8s.yaml.template
@ -26,12 +26,34 @@ spec:
        app.kubernetes.io/component: server
    spec:
      initContainers:
-      - name: wait-for-vllm-server
+      - name: wait-for-services
        image: busybox:1.28
-        command: ['sh', '-c', 'until nc -z vllm-server.default.svc.cluster.local 8001; do echo waiting for vllm-server on port 8001; sleep 2; done;']
+        command: ['sh', '-c', '
-      - name: wait-for-llm-nim-code
+          echo "Waiting for all required services to be ready...";
-        image: busybox:1.28
+
-        command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8000; sleep 2; done;']
+          echo "Checking vllm-server...";
          until nc -z vllm-server.default.svc.cluster.local 8001; do
            echo "waiting for vllm-server on port 8001";
            sleep 2;
          done;
          echo "vllm-server is ready!";
          echo "Checking llm-nim-code...";
          until nc -z llm-nim-code.default.svc.cluster.local 8000; do
            echo "waiting for llm-nim-code on port 8000";
            sleep 2;
          done;
          echo "llm-nim-code is ready!";
          echo "Checking ollama-safety...";
          until nc -z ollama-safety.default.svc.cluster.local 11434; do
            echo "waiting for ollama-safety on port 11434";
            sleep 2;
          done;
          echo "ollama-safety is ready!";
          echo "All services are ready!";
        ']
      containers:
      - name: llama-stack
        image: llamastack/distribution-starter:0.2.15
@ -56,7 +78,7 @@ spec:
        - name: NVIDIA_BASE_URL
          value: http://llm-nim-code.default.svc.cluster.local:8000
        - name: OLLAMA_BASE_URL
-          value: http://ollama-safety.default.svc.cluster.local:8000
+          value: http://ollama-safety.default.svc.cluster.local:11434
        - name: POSTGRES_HOST
          value: postgres-server.default.svc.cluster.local
        - name: POSTGRES_PORT
@ -69,7 +91,7 @@ spec:
          value: "${CODE_MODEL}"
        - name: TAVILY_SEARCH_API_KEY
          value: "${TAVILY_SEARCH_API_KEY}"
-        - name: OLLAMA_MODLE
+        - name: OLLAMA_MODEL
          value: "${OLLAMA_MODEL}"
        command: ["/bin/sh"]
        args: