mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-06 10:42:39 +00:00
Adding helm chart for deploying llama-stack
This commit is contained in:
parent
bfc79217a8
commit
1bc1f08037
14 changed files with 692 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -23,3 +23,4 @@ venv/
|
|||
pytest-report.xml
|
||||
.coverage
|
||||
.python-version
|
||||
**/local-*
|
23
chart/.helmignore
Normal file
23
chart/.helmignore
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
24
chart/Chart.yaml
Normal file
24
chart/Chart.yaml
Normal file
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: llama-stack
|
||||
description: Basic chart for deploying llama-stack
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
85
chart/README.md
Normal file
85
chart/README.md
Normal file
|
@ -0,0 +1,85 @@
|
|||
|
||||
# Llama Stack Helm Chart
|
||||
|
||||
This Helm chart is designed to install the Llama Stack, a comprehensive platform for llama-related tasks.
|
||||
|
||||
The chart provides a convenient way to deploy and manage the Llama Stack on Kubernetes or OpenShift clusters. It offers flexibility in customizing the deployment by allowing users to modify values such as image repositories, probe configurations, resource limits, and more.
|
||||
|
||||
Optionally, the chart also supports the installation of the llama-stack-playground, which provides a web-based interface for interacting with the Llama Stack.
|
||||
|
||||
## Quick Start
|
||||
|
||||
Create a `local-values.yaml` file with the following:
|
||||
|
||||
> **Note**
|
||||
> Chart currently only supports `vllm` framework directly. But other distributions can be used by modifying the `env` directly.
|
||||
|
||||
```yaml
|
||||
vllm:
|
||||
url: "https://<MY_VLLM_INSTANCE>:443/v1"
|
||||
inferenceModel: "meta-llama/Llama-3.1-8B-Instruct"
|
||||
apiKey: xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
```
|
||||
|
||||
Login to Kubernetes through the CLI and run:
|
||||
|
||||
```sh
|
||||
helm upgrade -i ollama-stack . -f local-values.yaml
|
||||
```
|
||||
|
||||
## Values
|
||||
|
||||
| Key | Type | Default | Description |
|
||||
|-----|------|---------|-------------|
|
||||
| autoscaling.enabled | bool | `false` | |
|
||||
| autoscaling.maxReplicas | int | `100` | |
|
||||
| autoscaling.minReplicas | int | `1` | |
|
||||
| autoscaling.targetCPUUtilizationPercentage | int | `80` | |
|
||||
| distribution | string | `"distribution-remote-vllm"` | |
|
||||
| image.pullPolicy | string | `"Always"` | |
|
||||
| image.repository | string | `"docker.io/llamastack/{{ $.Values.distribution }}"` | |
|
||||
| image.tag | string | `"0.1.6"` | |
|
||||
| ingress.annotations | object | `{}` | |
|
||||
| ingress.className | string | `""` | |
|
||||
| ingress.enabled | bool | `true` | |
|
||||
| ingress.hosts[0].host | string | `"chart-example.local"` | |
|
||||
| ingress.hosts[0].paths[0].path | string | `"/"` | |
|
||||
| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` | |
|
||||
| ingress.tls | list | `[]` | |
|
||||
| livenessProbe.httpGet.path | string | `"/v1/health"` | |
|
||||
| livenessProbe.httpGet.port | int | `5001` | |
|
||||
| podAnnotations | object | `{}` | |
|
||||
| podLabels | object | `{}` | |
|
||||
| podSecurityContext | object | `{}` | |
|
||||
| readinessProbe.httpGet.path | string | `"/v1/health"` | |
|
||||
| readinessProbe.httpGet.port | int | `5001` | |
|
||||
| replicaCount | int | `1` | |
|
||||
| resources.limits.cpu | string | `"100m"` | |
|
||||
| resources.limits.memory | string | `"500Mi"` | |
|
||||
| resources.requests.cpu | string | `"100m"` | |
|
||||
| resources.requests.memory | string | `"500Mi"` | |
|
||||
| route | object | `{"annotations":{},"enabled":false,"host":"","path":"","tls":{"enabled":true,"insecureEdgeTerminationPolicy":"Redirect","termination":"edge"}}` | Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift) |
|
||||
| route.annotations | object | `{}` | Additional custom annotations for the route |
|
||||
| route.host | string | Set by OpenShift | The hostname for the route |
|
||||
| route.path | string | `""` | The path for the OpenShift route |
|
||||
| route.tls.enabled | bool | `true` | Enable secure route settings |
|
||||
| route.tls.insecureEdgeTerminationPolicy | string | `"Redirect"` | Insecure route termination policy |
|
||||
| route.tls.termination | string | `"edge"` | Secure route termination policy |
|
||||
| runConfig.enabled | bool | `false` | |
|
||||
| service.port | int | `5001` | |
|
||||
| service.type | string | `"ClusterIP"` | |
|
||||
| serviceAccount.annotations | object | `{}` | |
|
||||
| serviceAccount.automount | bool | `true` | |
|
||||
| serviceAccount.create | bool | `false` | |
|
||||
| serviceAccount.name | string | `""` | |
|
||||
| startupProbe.failureThreshold | int | `30` | |
|
||||
| startupProbe.httpGet.path | string | `"/v1/health"` | |
|
||||
| startupProbe.httpGet.port | int | `5001` | |
|
||||
| startupProbe.initialDelaySeconds | int | `40` | |
|
||||
| startupProbe.periodSeconds | int | `10` | |
|
||||
| telemetry.enabled | bool | `false` | |
|
||||
| telemetry.serviceName | string | `"otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"` | |
|
||||
| telemetry.sinks | string | `"console,sqlite,otel"` | |
|
||||
| vllm.inferenceModel | string | `"llama2-7b-chat"` | |
|
||||
| vllm.url | string | `"http://vllm-server"` | |
|
||||
| yamlConfig | string | `"/config/run.yaml"` | |
|
123
chart/files/run.yaml
Executable file
123
chart/files/run.yaml
Executable file
|
@ -0,0 +1,123 @@
|
|||
version: '2'
|
||||
image_name: vllm-gpu
|
||||
apis:
|
||||
- agents
|
||||
- datasetio
|
||||
- eval
|
||||
- inference
|
||||
- safety
|
||||
- scoring
|
||||
- telemetry
|
||||
- tool_runtime
|
||||
- vector_io
|
||||
providers:
|
||||
inference:
|
||||
- provider_id: vllm
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:http://localhost:8000}
|
||||
api_token: ${env.VLLM_API_TOKEN}
|
||||
model: ${env.INFERENCE_MODEL:Llama3.2-3B-Instruct}
|
||||
tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:1}
|
||||
max_tokens: ${env.MAX_TOKENS:4096}
|
||||
enforce_eager: ${env.ENFORCE_EAGER:False}
|
||||
gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:0.7}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
config:
|
||||
kvstore:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/faiss_store.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
config: {}
|
||||
agents:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
persistence_store:
|
||||
type: sqlite
|
||||
namespace: null
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/agents_store.db
|
||||
telemetry:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config:
|
||||
service_name: ${env.OTEL_SERVICE_NAME:llama-stack}
|
||||
sinks: ${env.TELEMETRY_SINKS:console,sqlite}
|
||||
sqlite_db_path: ${env.SQLITE_DB_PATH:~/.llama/distributions/vllm-gpu/trace_store.db}
|
||||
eval:
|
||||
- provider_id: meta-reference
|
||||
provider_type: inline::meta-reference
|
||||
config: {}
|
||||
datasetio:
|
||||
- provider_id: huggingface
|
||||
provider_type: remote::huggingface
|
||||
config: {}
|
||||
- provider_id: localfs
|
||||
provider_type: inline::localfs
|
||||
config: {}
|
||||
scoring:
|
||||
- provider_id: basic
|
||||
provider_type: inline::basic
|
||||
config: {}
|
||||
- provider_id: llm-as-judge
|
||||
provider_type: inline::llm-as-judge
|
||||
config: {}
|
||||
- provider_id: braintrust
|
||||
provider_type: inline::braintrust
|
||||
config:
|
||||
openai_api_key: ${env.OPENAI_API_KEY:}
|
||||
tool_runtime:
|
||||
- provider_id: brave-search
|
||||
provider_type: remote::brave-search
|
||||
config:
|
||||
api_key: ${env.BRAVE_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: tavily-search
|
||||
provider_type: remote::tavily-search
|
||||
config:
|
||||
api_key: ${env.TAVILY_SEARCH_API_KEY:}
|
||||
max_results: 3
|
||||
- provider_id: code-interpreter
|
||||
provider_type: inline::code-interpreter
|
||||
config: {}
|
||||
- provider_id: rag-runtime
|
||||
provider_type: inline::rag-runtime
|
||||
config: {}
|
||||
- provider_id: model-context-protocol
|
||||
provider_type: remote::model-context-protocol
|
||||
config: {}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/vllm-gpu}/registry.db
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
provider_id: vllm
|
||||
model_type: llm
|
||||
- metadata:
|
||||
embedding_dimension: 384
|
||||
model_id: all-MiniLM-L6-v2
|
||||
provider_id: sentence-transformers
|
||||
model_type: embedding
|
||||
shields: []
|
||||
vector_dbs: []
|
||||
datasets: []
|
||||
scoring_fns: []
|
||||
benchmarks: []
|
||||
tool_groups:
|
||||
- toolgroup_id: builtin::websearch
|
||||
provider_id: brave-search
|
||||
- toolgroup_id: builtin::rag
|
||||
provider_id: rag-runtime
|
||||
- toolgroup_id: builtin::code_interpreter
|
||||
provider_id: code-interpreter
|
||||
server:
|
||||
port: 8321
|
62
chart/templates/_helpers.tpl
Normal file
62
chart/templates/_helpers.tpl
Normal file
|
@ -0,0 +1,62 @@
|
|||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "llama-stack.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "llama-stack.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "llama-stack.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "llama-stack.labels" -}}
|
||||
helm.sh/chart: {{ include "llama-stack.chart" . }}
|
||||
{{ include "llama-stack.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "llama-stack.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "llama-stack.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "llama-stack.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "llama-stack.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
7
chart/templates/config.yaml
Normal file
7
chart/templates/config.yaml
Normal file
|
@ -0,0 +1,7 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}-run-config
|
||||
data:
|
||||
run.yaml: |-
|
||||
{{- .Files.Get "files/run.yaml" | nindent 4 }}
|
91
chart/templates/deployment.yaml
Normal file
91
chart/templates/deployment.yaml
Normal file
|
@ -0,0 +1,91 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "llama-stack.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "llama-stack.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
image: "{{ (tpl .Values.image.repository $) }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.service.port }}
|
||||
protocol: TCP
|
||||
args:
|
||||
- "--yaml-config"
|
||||
- "/config/run.yaml"
|
||||
env:
|
||||
{{- with .Values.vllm }}
|
||||
- name: VLLM_URL
|
||||
value: {{ .url | quote }}
|
||||
- name: VLLM_API_TOKEN
|
||||
value: {{ .apiKey | default "" | quote}}
|
||||
- name: INFERENCE_MODEL
|
||||
value: {{ .inferenceModel | quote }}
|
||||
{{- end }}
|
||||
- name: LLAMA_STACK_PORT
|
||||
value: {{ .Values.service.port | quote }}
|
||||
{{- if .Values.telemetry.enabled }}
|
||||
- name: TELEMETRY_SINKS
|
||||
value: {{ .Values.telemetry.sinks | quote }}
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: {{ .Values.telemetry.serviceName | quote }}
|
||||
{{- end }}
|
||||
{{- with .Values.env }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
livenessProbe:
|
||||
{{- tpl (toYaml .Values.livenessProbe) $ | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- tpl (toYaml .Values.readinessProbe) $ | nindent 12 }}
|
||||
startupProbe:
|
||||
{{- tpl (toYaml .Values.startupProbe) $ | nindent 12 }}
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /config
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: {{ include "llama-stack.fullname" . }}-run-config
|
||||
defaultMode: 0755
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
32
chart/templates/hpa.yaml
Normal file
32
chart/templates/hpa.yaml
Normal file
|
@ -0,0 +1,32 @@
|
|||
{{- if .Values.autoscaling.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||
metrics:
|
||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- end }}
|
43
chart/templates/ingress.yaml
Normal file
43
chart/templates/ingress.yaml
Normal file
|
@ -0,0 +1,43 @@
|
|||
{{- if .Values.ingress.enabled -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.ingress.className }}
|
||||
ingressClassName: {{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- with .pathType }}
|
||||
pathType: {{ . }}
|
||||
{{- end }}
|
||||
backend:
|
||||
service:
|
||||
name: {{ include "llama-stack.fullname" $ }}
|
||||
port:
|
||||
number: {{ $.Values.service.port }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
31
chart/templates/openshift/route.yaml
Normal file
31
chart/templates/openshift/route.yaml
Normal file
|
@ -0,0 +1,31 @@
|
|||
{{- if .Values.route.enabled -}}
|
||||
kind: Route
|
||||
apiVersion: route.openshift.io/v1
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
{{- with .Values.route.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.route.host }}
|
||||
host: {{ .Values.route.host }}
|
||||
{{- end }}
|
||||
{{- if .Values.route.path }}
|
||||
path: {{ .Values.route.path }}
|
||||
{{- end }}
|
||||
to:
|
||||
kind: Service
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
weight: 100
|
||||
port:
|
||||
targetPort: llama-stack
|
||||
{{- if .Values.route.tls.enabled }}
|
||||
tls:
|
||||
termination: {{ .Values.route.tls.termination }}
|
||||
insecureEdgeTerminationPolicy: {{ .Values.route.tls.insecureEdgeTerminationPolicy }}
|
||||
{{- end }}
|
||||
wildcardPolicy: None
|
||||
{{- end }}
|
15
chart/templates/service.yaml
Normal file
15
chart/templates/service.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "llama-stack.fullname" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: llama-stack
|
||||
selector:
|
||||
{{- include "llama-stack.selectorLabels" . | nindent 4 }}
|
13
chart/templates/serviceaccount.yaml
Normal file
13
chart/templates/serviceaccount.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "llama-stack.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "llama-stack.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||
{{- end }}
|
142
chart/values.yaml
Normal file
142
chart/values.yaml
Normal file
|
@ -0,0 +1,142 @@
|
|||
# yamlConfig: "/config/run.yaml"
|
||||
|
||||
# TODO: Currently we are only working for vLLM this should be expanded in the future
|
||||
vllm:
|
||||
url: "http://vllm-server"
|
||||
inferenceModel: "llama2-7b-chat"
|
||||
# This is the API key for the VLLM server. It can be set in two ways through a secret:
|
||||
# TODO: Implement this
|
||||
# secret:
|
||||
# name: vllm-secret
|
||||
# key: vll
|
||||
# or directly with an api key (should be avoided in production)
|
||||
# apiKey: "xxxxxxxxxxxx"
|
||||
|
||||
# https://llama-stack.readthedocs.io/en/latest/distributions/selection.html
|
||||
distribution: distribution-remote-vllm
|
||||
|
||||
runConfig:
|
||||
enabled: false
|
||||
# customYaml:
|
||||
# Your custom run.yaml configuration file can be pasted here
|
||||
# If not set, the default run.yaml file in the `files/run.yaml` will be used
|
||||
|
||||
telemetry:
|
||||
enabled: false
|
||||
serviceName: "otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"
|
||||
sinks: "console,sqlite,otel"
|
||||
|
||||
# Use to allow for other env variables to be passed to the container
|
||||
# env:
|
||||
# MY_CUSTOM_ENV_VAR: "my-custom-env-var-value"
|
||||
|
||||
replicaCount: 1
|
||||
|
||||
# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
|
||||
image:
|
||||
repository: docker.io/llamastack/{{ $.Values.distribution }}
|
||||
tag: 0.1.6
|
||||
# This sets the pull policy for images.
|
||||
pullPolicy: Always
|
||||
|
||||
|
||||
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: false
|
||||
# Automatically mount a ServiceAccount's API credentials?
|
||||
automount: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
|
||||
# This is for setting Kubernetes Annotations to a Pod.
|
||||
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
|
||||
podAnnotations: {}
|
||||
# This is for setting Kubernetes Labels to a Pod.
|
||||
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext: {}
|
||||
# fsGroup: 2000
|
||||
|
||||
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
||||
service:
|
||||
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
||||
type: ClusterIP
|
||||
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
||||
port: 5001
|
||||
|
||||
|
||||
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
||||
ingress:
|
||||
enabled: true
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: chart-example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
|
||||
# -- Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift)
|
||||
route:
|
||||
enabled: false
|
||||
# Allow OCP to determine the host if left blank
|
||||
# -- The hostname for the route
|
||||
# @default -- Set by OpenShift
|
||||
host: ""
|
||||
# -- The path for the OpenShift route
|
||||
path: ""
|
||||
tls:
|
||||
# -- Enable secure route settings
|
||||
enabled: true
|
||||
# -- Secure route termination policy
|
||||
termination: edge
|
||||
# -- Insecure route termination policy
|
||||
insecureEdgeTerminationPolicy: Redirect
|
||||
# -- Additional custom annotations for the route
|
||||
annotations: {}
|
||||
|
||||
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 500Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 500Mi
|
||||
|
||||
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health
|
||||
port: 5001
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/health
|
||||
port: 5001
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /v1/health
|
||||
port: 5001
|
||||
initialDelaySeconds: 40
|
||||
periodSeconds: 10
|
||||
failureThreshold: 30
|
||||
|
||||
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
|
||||
autoscaling:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 100
|
||||
targetCPUUtilizationPercentage: 80
|
||||
# targetMemoryUtilizationPercentage: 80
|
Loading…
Add table
Add a link
Reference in a new issue