mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-31 02:53:52 +00:00
142 lines
4.5 KiB
YAML
142 lines
4.5 KiB
YAML
# yamlConfig: "/config/run.yaml"
|
|
|
|
# TODO: Currently we are only working for vLLM this should be expanded in the future
|
|
vllm:
|
|
url: "http://vllm-server"
|
|
inferenceModel: "llama2-7b-chat"
|
|
# This is the API key for the VLLM server. It can be set in two ways through a secret:
|
|
# TODO: Implement this
|
|
# secret:
|
|
# name: vllm-secret
|
|
# key: vll
|
|
# or directly with an api key (should be avoided in production)
|
|
# apiKey: "xxxxxxxxxxxx"
|
|
|
|
# https://llama-stack.readthedocs.io/en/latest/distributions/selection.html
|
|
distribution: distribution-remote-vllm
|
|
|
|
runConfig:
|
|
enabled: false
|
|
# customYaml:
|
|
# Your custom run.yaml configuration file can be pasted here
|
|
# If not set, the default run.yaml file in the `files/run.yaml` will be used
|
|
|
|
telemetry:
|
|
enabled: false
|
|
serviceName: "otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"
|
|
sinks: "console,sqlite,otel"
|
|
|
|
# Use to allow for other env variables to be passed to the container
|
|
# env:
|
|
# MY_CUSTOM_ENV_VAR: "my-custom-env-var-value"
|
|
|
|
replicaCount: 1
|
|
|
|
# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/
|
|
image:
|
|
repository: docker.io/llamastack/{{ $.Values.distribution }}
|
|
tag: 0.1.6
|
|
# This sets the pull policy for images.
|
|
pullPolicy: Always
|
|
|
|
|
|
# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/
|
|
serviceAccount:
|
|
# Specifies whether a service account should be created
|
|
create: false
|
|
# Automatically mount a ServiceAccount's API credentials?
|
|
automount: true
|
|
# Annotations to add to the service account
|
|
annotations: {}
|
|
# The name of the service account to use.
|
|
# If not set and create is true, a name is generated using the fullname template
|
|
name: ""
|
|
|
|
# This is for setting Kubernetes Annotations to a Pod.
|
|
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/
|
|
podAnnotations: {}
|
|
# This is for setting Kubernetes Labels to a Pod.
|
|
# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/
|
|
podLabels: {}
|
|
|
|
podSecurityContext: {}
|
|
# fsGroup: 2000
|
|
|
|
# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/
|
|
service:
|
|
# This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
|
|
type: ClusterIP
|
|
# This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
|
|
port: 5001
|
|
|
|
|
|
# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
|
|
ingress:
|
|
enabled: true
|
|
className: ""
|
|
annotations: {}
|
|
# kubernetes.io/ingress.class: nginx
|
|
# kubernetes.io/tls-acme: "true"
|
|
hosts:
|
|
- host: chart-example.local
|
|
paths:
|
|
- path: /
|
|
pathType: ImplementationSpecific
|
|
tls: []
|
|
# - secretName: chart-example-tls
|
|
# hosts:
|
|
# - chart-example.local
|
|
|
|
|
|
# -- Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift)
|
|
route:
|
|
enabled: false
|
|
# Allow OCP to determine the host if left blank
|
|
# -- The hostname for the route
|
|
# @default -- Set by OpenShift
|
|
host: ""
|
|
# -- The path for the OpenShift route
|
|
path: ""
|
|
tls:
|
|
# -- Enable secure route settings
|
|
enabled: true
|
|
# -- Secure route termination policy
|
|
termination: edge
|
|
# -- Insecure route termination policy
|
|
insecureEdgeTerminationPolicy: Redirect
|
|
# -- Additional custom annotations for the route
|
|
annotations: {}
|
|
|
|
|
|
resources:
|
|
limits:
|
|
cpu: 100m
|
|
memory: 500Mi
|
|
requests:
|
|
cpu: 100m
|
|
memory: 500Mi
|
|
|
|
# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /v1/health
|
|
port: 5001
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /v1/health
|
|
port: 5001
|
|
startupProbe:
|
|
httpGet:
|
|
path: /v1/health
|
|
port: 5001
|
|
initialDelaySeconds: 40
|
|
periodSeconds: 10
|
|
failureThreshold: 30
|
|
|
|
# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/
|
|
autoscaling:
|
|
enabled: false
|
|
minReplicas: 1
|
|
maxReplicas: 100
|
|
targetCPUUtilizationPercentage: 80
|
|
# targetMemoryUtilizationPercentage: 80
|