llama-stack-mirror/charts/llama-stack/values.yaml


# When set to true use the `run.yaml` file in the `files/run.yaml` directory
customRunConfig: false

# TODO: Currently we are only working for vLLM this should be expanded in the future
vllm:
  url: "http://vllm-server"
  inferenceModel: "llama2-7b-chat"
  # This is the API key for the VLLM server. It can be set in two ways through a secret:
  # TODO: Implement this
  # secret:
  #   name: vllm-secret
  #   key: vll
  # or directly with an api key (should be avoided in production)
  # apiKey: "xxxxxxxxxxxx"

# https://llama-stack.readthedocs.io/en/latest/distributions/selection.html
#  Note this is only used if the `image.repository` is not overridden
distribution: distribution-remote-vllm

telemetry:
  enabled: false
  serviceName: "otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"
  sinks: "console,sqlite,otel"

#  Use to allow for other env variables to be passed to the container
# env:
#   MY_CUSTOM_ENV_VAR: "my-custom-env-var-value"

replicaCount: 1

# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
image:
  repository: docker.io/llamastack/{{ $.Values.distribution }}
  # tag: latest
  # This sets the pull policy for images.
  pullPolicy: Always


# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
  # Specifies whether a service account should be created
  create: false
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

# This is for setting Kubernetes Annotations to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
podAnnotations: {}
# This is for setting Kubernetes Labels to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
podLabels: {}

podSecurityContext: {}
  # fsGroup: 2000

# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
  type: ClusterIP
  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
  port: 5001

# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
#   secret:
#     secretName: mysecret
#     optional: false

# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
#   mountPath: "/etc/foo"
#   readOnly: true


# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
  enabled: true
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local


# -- Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift)
route:
  enabled: false
  # Allow OCP to determine the host if left blank
  # -- The hostname for the route
  # @default -- Set by OpenShift
  host: ""
  # -- The path for the OpenShift route
  path: ""
  tls:
    # -- Enable secure route settings
    enabled: true
    # -- Secure route termination policy
    termination: edge
    # -- Insecure route termination policy
    insecureEdgeTerminationPolicy: Redirect
  # -- Additional custom annotations for the route
  annotations: {}


resources:
  limits:
    cpu: 100m
    memory: 500Mi
  requests:
    cpu: 100m
    memory: 500Mi

# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
  httpGet:
    path: /v1/health
    port: 5001
readinessProbe:
  httpGet:
    path: /v1/health
    port: 5001
startupProbe:
  httpGet:
    path: /v1/health
    port: 5001
  initialDelaySeconds: 40
  periodSeconds: 10
  failureThreshold: 30

# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80