mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 00:34:44 +00:00
not working grafana
This commit is contained in:
parent
2589bf962e
commit
a2bbb17fdd
6 changed files with 2424 additions and 0 deletions
|
@ -88,6 +88,19 @@ if [ "$USE_EBS" = "true" ]; then
|
|||
envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
|
||||
envsubst < ./chroma-k8s.yaml.template | kubectl apply -f -
|
||||
|
||||
|
||||
# Create monitoring namespace
|
||||
kubectl create namespace monitoring --dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
# Install Prometheus Operator CRDs
|
||||
./install-prometheus-operator.sh
|
||||
|
||||
# Apply RBAC for Prometheus
|
||||
kubectl apply -f ./prometheus-rbac.yaml
|
||||
|
||||
# Apply monitoring resources after CRDs are installed
|
||||
envsubst < ./monitoring-k8s.yaml.template | kubectl apply -f -
|
||||
|
||||
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
||||
--dry-run=client -o yaml > stack-configmap.yaml
|
||||
|
||||
|
@ -105,6 +118,16 @@ else
|
|||
envsubst < ./postgres-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||
envsubst < ./chroma-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||
|
||||
# Create monitoring namespace
|
||||
kubectl create namespace monitoring --dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
|
||||
# Apply RBAC for Prometheus
|
||||
kubectl apply -f ./prometheus-rbac.yaml
|
||||
|
||||
# Apply monitoring resources after CRDs are installed
|
||||
envsubst < ./monitoring-k8s.yaml.template | kubectl apply -f -
|
||||
|
||||
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
||||
--dry-run=client -o yaml > stack-configmap.yaml
|
||||
|
||||
|
|
|
@ -37,6 +37,14 @@ set -x
|
|||
# Delete UI deployment
|
||||
envsubst < ./ui-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
||||
|
||||
# Delete monitoring resources
|
||||
envsubst < ./monitoring-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
||||
|
||||
# Delete Prometheus RBAC resources
|
||||
kubectl delete -f ./prometheus-rbac.yaml --ignore-not-found=true
|
||||
|
||||
|
||||
|
||||
# Delete ingress
|
||||
envsubst < ./ingress-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
||||
|
||||
|
|
|
@ -17,6 +17,11 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app: llm-nim-code
|
||||
nim-type: llama-nim
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
prometheus.io/port: '8000'
|
||||
prometheus.io/path: '/v1/metrics'
|
||||
spec:
|
||||
imagePullSecrets:
|
||||
- name: ngc-docker-registry # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
|
||||
|
@ -42,6 +47,12 @@ spec:
|
|||
secretKeyRef:
|
||||
name: ngc-api
|
||||
key: NGC_API_KEY
|
||||
- name: NVIDIA_VISIBLE_DEVICES
|
||||
value: "all"
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: "compute,utility"
|
||||
- name: ENABLE_GPU_METRICS
|
||||
value: "true"
|
||||
volumeMounts:
|
||||
- name: model-cache
|
||||
mountPath: /models # default NIM cache path
|
||||
|
|
2334
docs/source/distributions/k8s/monitoring-k8s.yaml.template
Normal file
2334
docs/source/distributions/k8s/monitoring-k8s.yaml.template
Normal file
File diff suppressed because it is too large
Load diff
41
docs/source/distributions/k8s/prometheus-rbac.yaml
Normal file
41
docs/source/distributions/k8s/prometheus-rbac.yaml
Normal file
|
@ -0,0 +1,41 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- nodes
|
||||
- nodes/proxy
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups:
|
||||
- extensions
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- ingresses
|
||||
verbs: ["get", "list", "watch"]
|
||||
- nonResourceURLs: ["/metrics"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus
|
||||
namespace: monitoring
|
|
@ -23,7 +23,12 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: vllm
|
||||
app: vllm
|
||||
workload-type: inference
|
||||
annotations:
|
||||
prometheus.io/scrape: 'true'
|
||||
prometheus.io/port: '8001'
|
||||
prometheus.io/path: '/metrics'
|
||||
spec:
|
||||
# Removed nodeSelector for GPU nodes as they don't appear to exist in the cluster
|
||||
# If you have GPU nodes with a different label, you can uncomment and modify this section
|
||||
|
@ -45,6 +50,7 @@ spec:
|
|||
key: token
|
||||
ports:
|
||||
- containerPort: 8001
|
||||
name: http
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
|
@ -69,4 +75,5 @@ spec:
|
|||
- protocol: TCP
|
||||
port: 8001
|
||||
targetPort: 8001
|
||||
name: http
|
||||
type: ClusterIP
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue