mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
not working grafana
This commit is contained in:
parent
2589bf962e
commit
a2bbb17fdd
6 changed files with 2424 additions and 0 deletions
|
@ -88,6 +88,19 @@ if [ "$USE_EBS" = "true" ]; then
|
||||||
envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
|
envsubst < ./postgres-k8s.yaml.template | kubectl apply -f -
|
||||||
envsubst < ./chroma-k8s.yaml.template | kubectl apply -f -
|
envsubst < ./chroma-k8s.yaml.template | kubectl apply -f -
|
||||||
|
|
||||||
|
|
||||||
|
# Create monitoring namespace
|
||||||
|
kubectl create namespace monitoring --dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
|
# Install Prometheus Operator CRDs
|
||||||
|
./install-prometheus-operator.sh
|
||||||
|
|
||||||
|
# Apply RBAC for Prometheus
|
||||||
|
kubectl apply -f ./prometheus-rbac.yaml
|
||||||
|
|
||||||
|
# Apply monitoring resources after CRDs are installed
|
||||||
|
envsubst < ./monitoring-k8s.yaml.template | kubectl apply -f -
|
||||||
|
|
||||||
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
||||||
--dry-run=client -o yaml > stack-configmap.yaml
|
--dry-run=client -o yaml > stack-configmap.yaml
|
||||||
|
|
||||||
|
@ -105,6 +118,16 @@ else
|
||||||
envsubst < ./postgres-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
envsubst < ./postgres-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||||
envsubst < ./chroma-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
envsubst < ./chroma-k8s.yaml.template | sed 's/persistentVolumeClaim:/emptyDir: {}/g' | sed '/claimName:/d' | kubectl apply -f -
|
||||||
|
|
||||||
|
# Create monitoring namespace
|
||||||
|
kubectl create namespace monitoring --dry-run=client -o yaml | kubectl apply -f -
|
||||||
|
|
||||||
|
|
||||||
|
# Apply RBAC for Prometheus
|
||||||
|
kubectl apply -f ./prometheus-rbac.yaml
|
||||||
|
|
||||||
|
# Apply monitoring resources after CRDs are installed
|
||||||
|
envsubst < ./monitoring-k8s.yaml.template | kubectl apply -f -
|
||||||
|
|
||||||
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
kubectl create configmap llama-stack-config --from-file=stack_run_config.yaml \
|
||||||
--dry-run=client -o yaml > stack-configmap.yaml
|
--dry-run=client -o yaml > stack-configmap.yaml
|
||||||
|
|
||||||
|
|
|
@ -37,6 +37,14 @@ set -x
|
||||||
# Delete UI deployment
|
# Delete UI deployment
|
||||||
envsubst < ./ui-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
envsubst < ./ui-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
||||||
|
|
||||||
|
# Delete monitoring resources
|
||||||
|
envsubst < ./monitoring-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
||||||
|
|
||||||
|
# Delete Prometheus RBAC resources
|
||||||
|
kubectl delete -f ./prometheus-rbac.yaml --ignore-not-found=true
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Delete ingress
|
# Delete ingress
|
||||||
envsubst < ./ingress-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
envsubst < ./ingress-k8s.yaml.template | kubectl delete -f - --ignore-not-found=true
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,11 @@ spec:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app: llm-nim-code
|
app: llm-nim-code
|
||||||
|
nim-type: llama-nim
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '8000'
|
||||||
|
prometheus.io/path: '/v1/metrics'
|
||||||
spec:
|
spec:
|
||||||
imagePullSecrets:
|
imagePullSecrets:
|
||||||
- name: ngc-docker-registry # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
|
- name: ngc-docker-registry # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
|
||||||
|
@ -42,6 +47,12 @@ spec:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: ngc-api
|
name: ngc-api
|
||||||
key: NGC_API_KEY
|
key: NGC_API_KEY
|
||||||
|
- name: NVIDIA_VISIBLE_DEVICES
|
||||||
|
value: "all"
|
||||||
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||||
|
value: "compute,utility"
|
||||||
|
- name: ENABLE_GPU_METRICS
|
||||||
|
value: "true"
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: model-cache
|
- name: model-cache
|
||||||
mountPath: /models # default NIM cache path
|
mountPath: /models # default NIM cache path
|
||||||
|
|
2334
docs/source/distributions/k8s/monitoring-k8s.yaml.template
Normal file
2334
docs/source/distributions/k8s/monitoring-k8s.yaml.template
Normal file
File diff suppressed because it is too large
Load diff
41
docs/source/distributions/k8s/prometheus-rbac.yaml
Normal file
41
docs/source/distributions/k8s/prometheus-rbac.yaml
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources:
|
||||||
|
- nodes
|
||||||
|
- nodes/proxy
|
||||||
|
- services
|
||||||
|
- endpoints
|
||||||
|
- pods
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- apiGroups:
|
||||||
|
- extensions
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- ingresses
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
- nonResourceURLs: ["/metrics"]
|
||||||
|
verbs: ["get"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: prometheus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: prometheus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: prometheus
|
||||||
|
namespace: monitoring
|
|
@ -23,7 +23,12 @@ spec:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/name: vllm
|
app.kubernetes.io/name: vllm
|
||||||
|
app: vllm
|
||||||
workload-type: inference
|
workload-type: inference
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: 'true'
|
||||||
|
prometheus.io/port: '8001'
|
||||||
|
prometheus.io/path: '/metrics'
|
||||||
spec:
|
spec:
|
||||||
# Removed nodeSelector for GPU nodes as they don't appear to exist in the cluster
|
# Removed nodeSelector for GPU nodes as they don't appear to exist in the cluster
|
||||||
# If you have GPU nodes with a different label, you can uncomment and modify this section
|
# If you have GPU nodes with a different label, you can uncomment and modify this section
|
||||||
|
@ -45,6 +50,7 @@ spec:
|
||||||
key: token
|
key: token
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8001
|
- containerPort: 8001
|
||||||
|
name: http
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
nvidia.com/gpu: 1
|
nvidia.com/gpu: 1
|
||||||
|
@ -69,4 +75,5 @@ spec:
|
||||||
- protocol: TCP
|
- protocol: TCP
|
||||||
port: 8001
|
port: 8001
|
||||||
targetPort: 8001
|
targetPort: 8001
|
||||||
|
name: http
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue