mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 11:13:55 +00:00
Adding helm chart for deploying llama-stack
This commit is contained in:
parent
bfc79217a8
commit
1bc1f08037
14 changed files with 692 additions and 0 deletions
62
chart/templates/_helpers.tpl
Normal file
62
chart/templates/_helpers.tpl
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "llama-stack.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "llama-stack.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "llama-stack.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "llama-stack.labels" -}}
|
||||
helm.sh/chart: {{ include "llama-stack.chart" . }}
|
||||
{{ include "llama-stack.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "llama-stack.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "llama-stack.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "llama-stack.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "llama-stack.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
7
chart/templates/config.yaml
Normal file
7
chart/templates/config.yaml
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}-run-config
|
||||
data:
|
||||
run.yaml: |-
|
||||
{{- .Files.Get "files/run.yaml" | nindent 4 }}
|
||||
91
chart/templates/deployment.yaml
Normal file
91
chart/templates/deployment.yaml
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "llama-stack.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "llama-stack.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
image: "{{ (tpl .Values.image.repository $) }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.service.port }}
|
||||
protocol: TCP
|
||||
args:
|
||||
- "--yaml-config"
|
||||
- "/config/run.yaml"
|
||||
env:
|
||||
{{- with .Values.vllm }}
|
||||
- name: VLLM_URL
|
||||
value: {{ .url | quote }}
|
||||
- name: VLLM_API_TOKEN
|
||||
value: {{ .apiKey | default "" | quote}}
|
||||
- name: INFERENCE_MODEL
|
||||
value: {{ .inferenceModel | quote }}
|
||||
{{- end }}
|
||||
- name: LLAMA_STACK_PORT
|
||||
value: {{ .Values.service.port | quote }}
|
||||
{{- if .Values.telemetry.enabled }}
|
||||
- name: TELEMETRY_SINKS
|
||||
value: {{ .Values.telemetry.sinks | quote }}
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: {{ .Values.telemetry.serviceName | quote }}
|
||||
{{- end }}
|
||||
{{- with .Values.env }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
livenessProbe:
|
||||
{{- tpl (toYaml .Values.livenessProbe) $ | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- tpl (toYaml .Values.readinessProbe) $ | nindent 12 }}
|
||||
startupProbe:
|
||||
{{- tpl (toYaml .Values.startupProbe) $ | nindent 12 }}
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /config
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: {{ include "llama-stack.fullname" . }}-run-config
|
||||
defaultMode: 0755
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
32
chart/templates/hpa.yaml
Normal file
32
chart/templates/hpa.yaml
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
{{- if .Values.autoscaling.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||
metrics:
|
||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
43
chart/templates/ingress.yaml
Normal file
43
chart/templates/ingress.yaml
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
{{- if .Values.ingress.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.ingress.className }}
|
||||
ingressClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- with .pathType }}
|
||||
pathType: {{ . }}
|
||||
{{- end }}
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "llama-stack.fullname" $ }}
|
||||
port:
|
||||
number: {{ $.Values.service.port }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
31
chart/templates/openshift/route.yaml
Normal file
31
chart/templates/openshift/route.yaml
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{{- if .Values.route.enabled -}}
|
||||
kind: Route
|
||||
apiVersion: route.openshift.io/v1
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
{{- with .Values.route.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.route.host }}
|
||||
host: {{ .Values.route.host }}
|
||||
{{- end }}
|
||||
{{- if .Values.route.path }}
|
||||
path: {{ .Values.route.path }}
|
||||
{{- end }}
|
||||
to:
|
||||
kind: Service
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
weight: 100
|
||||
port:
|
||||
targetPort: llama-stack
|
||||
{{- if .Values.route.tls.enabled }}
|
||||
tls:
|
||||
termination: {{ .Values.route.tls.termination }}
|
||||
insecureEdgeTerminationPolicy: {{ .Values.route.tls.insecureEdgeTerminationPolicy }}
|
||||
{{- end }}
|
||||
wildcardPolicy: None
|
||||
{{- end }}
|
||||
15
chart/templates/service.yaml
Normal file
15
chart/templates/service.yaml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: llama-stack
|
||||
selector:
|
||||
{{- include "llama-stack.selectorLabels" . | nindent 4 }}
|
||||
13
chart/templates/serviceaccount.yaml
Normal file
13
chart/templates/serviceaccount.yaml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "llama-stack.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||
{{- end }}
|
||||
Loading…
Add table
Add a link
Reference in a new issue