mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-23 16:37:28 +00:00
74 lines
1.7 KiB
Text
74 lines
1.7 KiB
Text
apiVersion: v1
|
|
kind: PersistentVolumeClaim
|
|
metadata:
|
|
name: ollama-models-safety
|
|
spec:
|
|
accessModes:
|
|
- ReadWriteOnce
|
|
volumeMode: Filesystem
|
|
resources:
|
|
requests:
|
|
storage: 10Gi
|
|
---
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: ollama-safety
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app.kubernetes.io/name: ollama-safety
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app.kubernetes.io/name: ollama-safety
|
|
workload-type: inference
|
|
annotations:
|
|
prometheus.io/scrape: "true"
|
|
prometheus.io/port: "11434"
|
|
prometheus.io/path: "/metrics"
|
|
spec:
|
|
containers:
|
|
- name: ollama-safety
|
|
image: ollama/ollama:latest
|
|
command: ["/bin/sh", "-c"]
|
|
args: [
|
|
"ollama serve & sleep 5 && ollama pull llama-guard3:1b && ollama run llama-guard3:1b & wait"
|
|
]
|
|
env:
|
|
- name: OLLAMA_HOST
|
|
value: "0.0.0.0"
|
|
- name: OLLAMA_PORT
|
|
value: "11434"
|
|
- name: OLLAMA_ENABLE_METRICS
|
|
value: "true"
|
|
ports:
|
|
- containerPort: 11434
|
|
resources:
|
|
requests:
|
|
memory: "8Gi"
|
|
cpu: "6000m"
|
|
limits:
|
|
memory: "16Gi"
|
|
cpu: "6000m"
|
|
volumeMounts:
|
|
- name: ollama-storage
|
|
mountPath: /root/.ollama
|
|
volumes:
|
|
- name: ollama-storage
|
|
persistentVolumeClaim:
|
|
claimName: ollama-models-safety
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: ollama-server-safety
|
|
spec:
|
|
selector:
|
|
app.kubernetes.io/name: ollama-safety
|
|
ports:
|
|
- protocol: TCP
|
|
port: 11434
|
|
targetPort: 11434
|
|
type: ClusterIP
|