Adding helm chart for deploying llama-stack

This commit is contained in:
Jamie Land 2025-03-18 11:32:40 -04:00
parent bfc79217a8
commit 1bc1f08037
14 changed files with 692 additions and 0 deletions

View file

@ -0,0 +1,62 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "llama-stack.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "llama-stack.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "llama-stack.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}
{{/*
Common labels
*/}}
{{- define "llama-stack.labels" -}}
helm.sh/chart: {{ include "llama-stack.chart" . }}
{{ include "llama-stack.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
*/}}
{{- define "llama-stack.selectorLabels" -}}
app.kubernetes.io/name: {{ include "llama-stack.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use
*/}}
{{- define "llama-stack.serviceAccountName" -}}
{{- if .Values.serviceAccount.create }}
{{- default (include "llama-stack.fullname" .) .Values.serviceAccount.name }}
{{- else }}
{{- default "default" .Values.serviceAccount.name }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "llama-stack.fullname" . }}-run-config
data:
run.yaml: |-
{{- .Files.Get "files/run.yaml" | nindent 4 }}

View file

@ -0,0 +1,91 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "llama-stack.fullname" . }}
labels:
{{- include "llama-stack.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{- include "llama-stack.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "llama-stack.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "llama-stack.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
containers:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ (tpl .Values.image.repository $) }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
args:
- "--yaml-config"
- "/config/run.yaml"
env:
{{- with .Values.vllm }}
- name: VLLM_URL
value: {{ .url | quote }}
- name: VLLM_API_TOKEN
value: {{ .apiKey | default "" | quote}}
- name: INFERENCE_MODEL
value: {{ .inferenceModel | quote }}
{{- end }}
- name: LLAMA_STACK_PORT
value: {{ .Values.service.port | quote }}
{{- if .Values.telemetry.enabled }}
- name: TELEMETRY_SINKS
value: {{ .Values.telemetry.sinks | quote }}
- name: OTEL_SERVICE_NAME
value: {{ .Values.telemetry.serviceName | quote }}
{{- end }}
{{- with .Values.env }}
{{- toYaml . | nindent 12 }}
{{- end }}
livenessProbe:
{{- tpl (toYaml .Values.livenessProbe) $ | nindent 12 }}
readinessProbe:
{{- tpl (toYaml .Values.readinessProbe) $ | nindent 12 }}
startupProbe:
{{- tpl (toYaml .Values.startupProbe) $ | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
- name: config-volume
mountPath: /config
volumes:
- name: config-volume
configMap:
name: {{ include "llama-stack.fullname" . }}-run-config
defaultMode: 0755
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}

32
chart/templates/hpa.yaml Normal file
View file

@ -0,0 +1,32 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "llama-stack.fullname" . }}
labels:
{{- include "llama-stack.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "llama-stack.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,43 @@
{{- if .Values.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: {{ include "llama-stack.fullname" . }}
labels:
{{- include "llama-stack.labels" . | nindent 4 }}
{{- with .Values.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- with .Values.ingress.className }}
ingressClassName: {{ . }}
{{- end }}
{{- if .Values.ingress.tls }}
tls:
{{- range .Values.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- with .pathType }}
pathType: {{ . }}
{{- end }}
backend:
service:
name: {{ include "llama-stack.fullname" $ }}
port:
number: {{ $.Values.service.port }}
{{- end }}
{{- end }}
{{- end }}

View file

@ -0,0 +1,31 @@
{{- if .Values.route.enabled -}}
kind: Route
apiVersion: route.openshift.io/v1
metadata:
name: {{ include "llama-stack.fullname" . }}
labels:
{{- include "llama-stack.labels" . | nindent 4 }}
{{- with .Values.route.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if .Values.route.host }}
host: {{ .Values.route.host }}
{{- end }}
{{- if .Values.route.path }}
path: {{ .Values.route.path }}
{{- end }}
to:
kind: Service
name: {{ include "llama-stack.fullname" . }}
weight: 100
port:
targetPort: llama-stack
{{- if .Values.route.tls.enabled }}
tls:
termination: {{ .Values.route.tls.termination }}
insecureEdgeTerminationPolicy: {{ .Values.route.tls.insecureEdgeTerminationPolicy }}
{{- end }}
wildcardPolicy: None
{{- end }}

View file

@ -0,0 +1,15 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "llama-stack.fullname" . }}
labels:
{{- include "llama-stack.labels" . | nindent 4 }}
spec:
type: {{ .Values.service.type }}
ports:
- port: {{ .Values.service.port }}
targetPort: http
protocol: TCP
name: llama-stack
selector:
{{- include "llama-stack.selectorLabels" . | nindent 4 }}

View file

@ -0,0 +1,13 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "llama-stack.serviceAccountName" . }}
labels:
{{- include "llama-stack.labels" . | nindent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
{{- end }}