llama-stack-mirror/chart/values.yaml

# yamlConfig: "/config/run.yaml"

# TODO: Currently we are only working for vLLM this should be expanded in the future
vllm:
  url: "http://vllm-server"
  inferenceModel: "llama2-7b-chat"
  # This is the API key for the VLLM server. It can be set in two ways through a secret:
  # TODO: Implement this
  # secret:
  #   name: vllm-secret
  #   key: vll
  # or directly with an api key (should be avoided in production)
  # apiKey: "xxxxxxxxxxxx"

# https://llama-stack.readthedocs.io/en/latest/distributions/selection.html
distribution: distribution-remote-vllm

runConfig:
  enabled: false
  # customYaml:
  # Your custom run.yaml configuration file can be pasted here
  # If not set, the default run.yaml file in the `files/run.yaml` will be used

telemetry:
  enabled: false
  serviceName: "otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"
  sinks: "console,sqlite,otel"

#  Use to allow for other env variables to be passed to the container
# env:
#   MY_CUSTOM_ENV_VAR: "my-custom-env-var-value"

replicaCount: 1

# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
image:
  repository: docker.io/llamastack/{{ $.Values.distribution }}
  tag: 0.1.6
  # This sets the pull policy for images.
  pullPolicy: Always


# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
serviceAccount:
  # Specifies whether a service account should be created
  create: false
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

# This is for setting Kubernetes Annotations to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
podAnnotations: {}
# This is for setting Kubernetes Labels to a Pod.
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
podLabels: {}

podSecurityContext: {}
  # fsGroup: 2000

# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
service:
  # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
  type: ClusterIP
  # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
  port: 5001


# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
ingress:
  enabled: true
  className: ""
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: chart-example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local


# -- Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift)
route:
  enabled: false
  # Allow OCP to determine the host if left blank
  # -- The hostname for the route
  # @default -- Set by OpenShift
  host: ""
  # -- The path for the OpenShift route
  path: ""
  tls:
    # -- Enable secure route settings
    enabled: true
    # -- Secure route termination policy
    termination: edge
    # -- Insecure route termination policy
    insecureEdgeTerminationPolicy: Redirect
  # -- Additional custom annotations for the route
  annotations: {}


resources:
  limits:
    cpu: 100m
    memory: 500Mi
  requests:
    cpu: 100m
    memory: 500Mi

# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
livenessProbe:
  httpGet:
    path: /v1/health
    port: 5001
readinessProbe:
  httpGet:
    path: /v1/health
    port: 5001
startupProbe:
  httpGet:
    path: /v1/health
    port: 5001
  initialDelaySeconds: 40
  periodSeconds: 10
  failureThreshold: 30

# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80