mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 14:08:00 +00:00
second checkpoint
This commit is contained in:
parent
67f19f76b2
commit
645e55a450
4 changed files with 78 additions and 17 deletions
38
docs/source/distributions/k8s/install-prometheus.sh
Normal file
38
docs/source/distributions/k8s/install-prometheus.sh
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Script to install prometheus-community/kube-prometheus-stack using Helm
|
||||||
|
|
||||||
|
# Exit immediately if a command exits with a non-zero status
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Add the Prometheus community Helm repository if it doesn't exist
|
||||||
|
if ! helm repo list | grep -q "prometheus-community"; then
|
||||||
|
echo "Adding prometheus-community Helm repository..."
|
||||||
|
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update Helm repositories
|
||||||
|
echo "Updating Helm repositories..."
|
||||||
|
helm repo update
|
||||||
|
|
||||||
|
# Create namespace for monitoring if it doesn't exist
|
||||||
|
if ! kubectl get namespace monitoring &> /dev/null; then
|
||||||
|
echo "Creating monitoring namespace..."
|
||||||
|
kubectl create namespace monitoring
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Install kube-prometheus-stack
|
||||||
|
echo "Installing kube-prometheus-stack..."
|
||||||
|
helm install prometheus prometheus-community/kube-prometheus-stack \
|
||||||
|
--namespace monitoring \
|
||||||
|
--set grafana.enabled=true \
|
||||||
|
--set prometheus.enabled=true \
|
||||||
|
--set alertmanager.enabled=true \
|
||||||
|
--set prometheus.service.type=ClusterIP \
|
||||||
|
--set grafana.service.type=ClusterIP \
|
||||||
|
--set alertmanager.service.type=ClusterIP \
|
||||||
|
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
|
||||||
|
--set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
|
||||||
|
|
||||||
|
echo "kube-prometheus-stack has been installed successfully!"
|
||||||
|
echo "To access Grafana UI, run: kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80"
|
||||||
|
echo "Default Grafana credentials - Username: admin, Password: prom-operator"
|
|
@ -3062,16 +3062,15 @@ spec:
|
||||||
port:
|
port:
|
||||||
number: 9090
|
number: 9090
|
||||||
---
|
---
|
||||||
# NVIDIA DCGM Exporter Deployment for GPU metrics
|
# NVIDIA DCGM Exporter DaemonSet for GPU metrics
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: DaemonSet
|
||||||
metadata:
|
metadata:
|
||||||
name: dcgm-exporter
|
name: dcgm-exporter
|
||||||
namespace: monitoring
|
namespace: monitoring
|
||||||
labels:
|
labels:
|
||||||
app: dcgm-exporter
|
app: dcgm-exporter
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
app: dcgm-exporter
|
app: dcgm-exporter
|
||||||
|
@ -3082,7 +3081,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: dcgm-exporter
|
- name: dcgm-exporter
|
||||||
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.2.3-4.3.0-ubuntu22.04
|
image: nvidia/dcgm-exporter:3.2.5-3.1.7-ubuntu20.04
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: false
|
runAsNonRoot: false
|
||||||
runAsUser: 0
|
runAsUser: 0
|
||||||
|
@ -3093,17 +3092,19 @@ spec:
|
||||||
- -f
|
- -f
|
||||||
- /etc/dcgm-exporter/dcp-metrics-included.csv
|
- /etc/dcgm-exporter/dcp-metrics-included.csv
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-metrics
|
|
||||||
mountPath: /dev/metrics
|
|
||||||
- name: dcgm-config
|
- name: dcgm-config
|
||||||
mountPath: /etc/dcgm-exporter
|
mountPath: /etc/dcgm-exporter
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-metrics
|
|
||||||
hostPath:
|
|
||||||
path: /dev/metrics
|
|
||||||
- name: dcgm-config
|
- name: dcgm-config
|
||||||
configMap:
|
configMap:
|
||||||
name: dcgm-config
|
name: dcgm-config
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
|
nvidia.com/gpu.present: "true"
|
||||||
|
tolerations:
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
---
|
---
|
||||||
# DCGM Exporter ConfigMap for metrics configuration
|
# DCGM Exporter ConfigMap for metrics configuration
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
|
|
|
@ -63,7 +63,7 @@ spec:
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: ollama-server-safety
|
name: ollama-safety
|
||||||
spec:
|
spec:
|
||||||
selector:
|
selector:
|
||||||
app.kubernetes.io/name: ollama-safety
|
app.kubernetes.io/name: ollama-safety
|
||||||
|
|
|
@ -26,12 +26,34 @@ spec:
|
||||||
app.kubernetes.io/component: server
|
app.kubernetes.io/component: server
|
||||||
spec:
|
spec:
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: wait-for-vllm-server
|
- name: wait-for-services
|
||||||
image: busybox:1.28
|
image: busybox:1.28
|
||||||
command: ['sh', '-c', 'until nc -z vllm-server.default.svc.cluster.local 8001; do echo waiting for vllm-server on port 8001; sleep 2; done;']
|
command: ['sh', '-c', '
|
||||||
- name: wait-for-llm-nim-code
|
echo "Waiting for all required services to be ready...";
|
||||||
image: busybox:1.28
|
|
||||||
command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8000; sleep 2; done;']
|
echo "Checking vllm-server...";
|
||||||
|
until nc -z vllm-server.default.svc.cluster.local 8001; do
|
||||||
|
echo "waiting for vllm-server on port 8001";
|
||||||
|
sleep 2;
|
||||||
|
done;
|
||||||
|
echo "vllm-server is ready!";
|
||||||
|
|
||||||
|
echo "Checking llm-nim-code...";
|
||||||
|
until nc -z llm-nim-code.default.svc.cluster.local 8000; do
|
||||||
|
echo "waiting for llm-nim-code on port 8000";
|
||||||
|
sleep 2;
|
||||||
|
done;
|
||||||
|
echo "llm-nim-code is ready!";
|
||||||
|
|
||||||
|
echo "Checking ollama-safety...";
|
||||||
|
until nc -z ollama-safety.default.svc.cluster.local 11434; do
|
||||||
|
echo "waiting for ollama-safety on port 11434";
|
||||||
|
sleep 2;
|
||||||
|
done;
|
||||||
|
echo "ollama-safety is ready!";
|
||||||
|
|
||||||
|
echo "All services are ready!";
|
||||||
|
']
|
||||||
containers:
|
containers:
|
||||||
- name: llama-stack
|
- name: llama-stack
|
||||||
image: llamastack/distribution-starter:0.2.15
|
image: llamastack/distribution-starter:0.2.15
|
||||||
|
@ -56,7 +78,7 @@ spec:
|
||||||
- name: NVIDIA_BASE_URL
|
- name: NVIDIA_BASE_URL
|
||||||
value: http://llm-nim-code.default.svc.cluster.local:8000
|
value: http://llm-nim-code.default.svc.cluster.local:8000
|
||||||
- name: OLLAMA_BASE_URL
|
- name: OLLAMA_BASE_URL
|
||||||
value: http://ollama-safety.default.svc.cluster.local:8000
|
value: http://ollama-safety.default.svc.cluster.local:11434
|
||||||
- name: POSTGRES_HOST
|
- name: POSTGRES_HOST
|
||||||
value: postgres-server.default.svc.cluster.local
|
value: postgres-server.default.svc.cluster.local
|
||||||
- name: POSTGRES_PORT
|
- name: POSTGRES_PORT
|
||||||
|
@ -69,7 +91,7 @@ spec:
|
||||||
value: "${CODE_MODEL}"
|
value: "${CODE_MODEL}"
|
||||||
- name: TAVILY_SEARCH_API_KEY
|
- name: TAVILY_SEARCH_API_KEY
|
||||||
value: "${TAVILY_SEARCH_API_KEY}"
|
value: "${TAVILY_SEARCH_API_KEY}"
|
||||||
- name: OLLAMA_MODLE
|
- name: OLLAMA_MODEL
|
||||||
value: "${OLLAMA_MODEL}"
|
value: "${OLLAMA_MODEL}"
|
||||||
command: ["/bin/sh"]
|
command: ["/bin/sh"]
|
||||||
args:
|
args:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue