mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-15 06:00:48 +00:00
second checkpoint
This commit is contained in:
parent
67f19f76b2
commit
645e55a450
4 changed files with 78 additions and 17 deletions
38
docs/source/distributions/k8s/install-prometheus.sh
Normal file
38
docs/source/distributions/k8s/install-prometheus.sh
Normal file
|
@ -0,0 +1,38 @@
|
|||
#!/bin/bash
|
||||
# Script to install prometheus-community/kube-prometheus-stack using Helm
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Add the Prometheus community Helm repository if it doesn't exist
|
||||
if ! helm repo list | grep -q "prometheus-community"; then
|
||||
echo "Adding prometheus-community Helm repository..."
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
fi
|
||||
|
||||
# Update Helm repositories
|
||||
echo "Updating Helm repositories..."
|
||||
helm repo update
|
||||
|
||||
# Create namespace for monitoring if it doesn't exist
|
||||
if ! kubectl get namespace monitoring &> /dev/null; then
|
||||
echo "Creating monitoring namespace..."
|
||||
kubectl create namespace monitoring
|
||||
fi
|
||||
|
||||
# Install kube-prometheus-stack
|
||||
echo "Installing kube-prometheus-stack..."
|
||||
helm install prometheus prometheus-community/kube-prometheus-stack \
|
||||
--namespace monitoring \
|
||||
--set grafana.enabled=true \
|
||||
--set prometheus.enabled=true \
|
||||
--set alertmanager.enabled=true \
|
||||
--set prometheus.service.type=ClusterIP \
|
||||
--set grafana.service.type=ClusterIP \
|
||||
--set alertmanager.service.type=ClusterIP \
|
||||
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
|
||||
--set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false
|
||||
|
||||
echo "kube-prometheus-stack has been installed successfully!"
|
||||
echo "To access Grafana UI, run: kubectl port-forward -n monitoring svc/prometheus-grafana 3000:80"
|
||||
echo "Default Grafana credentials - Username: admin, Password: prom-operator"
|
|
@ -3062,16 +3062,15 @@ spec:
|
|||
port:
|
||||
number: 9090
|
||||
---
|
||||
# NVIDIA DCGM Exporter Deployment for GPU metrics
|
||||
# NVIDIA DCGM Exporter DaemonSet for GPU metrics
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: dcgm-exporter
|
||||
namespace: monitoring
|
||||
labels:
|
||||
app: dcgm-exporter
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: dcgm-exporter
|
||||
|
@ -3082,7 +3081,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: dcgm-exporter
|
||||
image: nvcr.io/nvidia/k8s/dcgm-exporter:4.2.3-4.3.0-ubuntu22.04
|
||||
image: nvidia/dcgm-exporter:3.2.5-3.1.7-ubuntu20.04
|
||||
securityContext:
|
||||
runAsNonRoot: false
|
||||
runAsUser: 0
|
||||
|
@ -3093,17 +3092,19 @@ spec:
|
|||
- -f
|
||||
- /etc/dcgm-exporter/dcp-metrics-included.csv
|
||||
volumeMounts:
|
||||
- name: device-metrics
|
||||
mountPath: /dev/metrics
|
||||
- name: dcgm-config
|
||||
mountPath: /etc/dcgm-exporter
|
||||
volumes:
|
||||
- name: device-metrics
|
||||
hostPath:
|
||||
path: /dev/metrics
|
||||
- name: dcgm-config
|
||||
configMap:
|
||||
name: dcgm-config
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
nvidia.com/gpu.present: "true"
|
||||
tolerations:
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
---
|
||||
# DCGM Exporter ConfigMap for metrics configuration
|
||||
apiVersion: v1
|
||||
|
|
|
@ -63,7 +63,7 @@ spec:
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: ollama-server-safety
|
||||
name: ollama-safety
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: ollama-safety
|
||||
|
|
|
@ -26,12 +26,34 @@ spec:
|
|||
app.kubernetes.io/component: server
|
||||
spec:
|
||||
initContainers:
|
||||
- name: wait-for-vllm-server
|
||||
- name: wait-for-services
|
||||
image: busybox:1.28
|
||||
command: ['sh', '-c', 'until nc -z vllm-server.default.svc.cluster.local 8001; do echo waiting for vllm-server on port 8001; sleep 2; done;']
|
||||
- name: wait-for-llm-nim-code
|
||||
image: busybox:1.28
|
||||
command: ['sh', '-c', 'until nc -z llm-nim-code.default.svc.cluster.local 8000; do echo waiting for llm-nim-code on port 8000; sleep 2; done;']
|
||||
command: ['sh', '-c', '
|
||||
echo "Waiting for all required services to be ready...";
|
||||
|
||||
echo "Checking vllm-server...";
|
||||
until nc -z vllm-server.default.svc.cluster.local 8001; do
|
||||
echo "waiting for vllm-server on port 8001";
|
||||
sleep 2;
|
||||
done;
|
||||
echo "vllm-server is ready!";
|
||||
|
||||
echo "Checking llm-nim-code...";
|
||||
until nc -z llm-nim-code.default.svc.cluster.local 8000; do
|
||||
echo "waiting for llm-nim-code on port 8000";
|
||||
sleep 2;
|
||||
done;
|
||||
echo "llm-nim-code is ready!";
|
||||
|
||||
echo "Checking ollama-safety...";
|
||||
until nc -z ollama-safety.default.svc.cluster.local 11434; do
|
||||
echo "waiting for ollama-safety on port 11434";
|
||||
sleep 2;
|
||||
done;
|
||||
echo "ollama-safety is ready!";
|
||||
|
||||
echo "All services are ready!";
|
||||
']
|
||||
containers:
|
||||
- name: llama-stack
|
||||
image: llamastack/distribution-starter:0.2.15
|
||||
|
@ -56,7 +78,7 @@ spec:
|
|||
- name: NVIDIA_BASE_URL
|
||||
value: http://llm-nim-code.default.svc.cluster.local:8000
|
||||
- name: OLLAMA_BASE_URL
|
||||
value: http://ollama-safety.default.svc.cluster.local:8000
|
||||
value: http://ollama-safety.default.svc.cluster.local:11434
|
||||
- name: POSTGRES_HOST
|
||||
value: postgres-server.default.svc.cluster.local
|
||||
- name: POSTGRES_PORT
|
||||
|
@ -69,7 +91,7 @@ spec:
|
|||
value: "${CODE_MODEL}"
|
||||
- name: TAVILY_SEARCH_API_KEY
|
||||
value: "${TAVILY_SEARCH_API_KEY}"
|
||||
- name: OLLAMA_MODLE
|
||||
- name: OLLAMA_MODEL
|
||||
value: "${OLLAMA_MODEL}"
|
||||
command: ["/bin/sh"]
|
||||
args:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue