llama-stack-mirror/charts/llama-stack/values.yaml
2025-03-18 19:45:59 -04:00

152 lines
4.8 KiB
YAML

# When set to true use the `run.yaml` file in the `files/run.yaml` directory
customRunConfig: false
# TODO: Currently we are only working for vLLM this should be expanded in the future
vllm:
url: "http://vllm-server"
inferenceModel: "llama2-7b-chat"
# This is the API key for the VLLM server. It can be set in two ways through a secret:
# TODO: Implement this
# secret:
# name: vllm-secret
# key: vll
# or directly with an api key (should be avoided in production)
# apiKey: "xxxxxxxxxxxx"
# https://llama-stack.readthedocs.io/en/latest/distributions/selection.html
# Note this is only used if the `image.repository` is not overridden
distribution: distribution-remote-vllm
telemetry:
enabled: false
serviceName: "otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"
sinks: "console,sqlite,otel"
# Use to allow for other env variables to be passed to the container
# env:
# MY_CUSTOM_ENV_VAR: "my-custom-env-var-value"
replicaCount: 1
# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
image:
repository: docker.io/llamastack/{{ $.Values.distribution }}
# tag: latest
# This sets the pull policy for images.
pullPolicy: Always
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
# Specifies whether a service account should be created
create: false
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# This is for setting Kubernetes Annotations to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
podAnnotations: {}
# This is for setting Kubernetes Labels to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
podLabels: {}
podSecurityContext: {}
# fsGroup: 2000
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
type: ClusterIP
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
port: 5001
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
enabled: true
className: ""
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
# -- Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift)
route:
enabled: false
# Allow OCP to determine the host if left blank
# -- The hostname for the route
# @default -- Set by OpenShift
host: ""
# -- The path for the OpenShift route
path: ""
tls:
# -- Enable secure route settings
enabled: true
# -- Secure route termination policy
termination: edge
# -- Insecure route termination policy
insecureEdgeTerminationPolicy: Redirect
# -- Additional custom annotations for the route
annotations: {}
resources:
limits:
cpu: 100m
memory: 500Mi
requests:
cpu: 100m
memory: 500Mi
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
httpGet:
path: /v1/health
port: 5001
readinessProbe:
httpGet:
path: /v1/health
port: 5001
startupProbe:
httpGet:
path: /v1/health
port: 5001
initialDelaySeconds: 40
periodSeconds: 10
failureThreshold: 30
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80