From 8a61e028da55479572061942231f484c191fc0b2 Mon Sep 17 00:00:00 2001
From: Jamie Land <hokie10@gmail.com>
Date: Tue, 18 Mar 2025 12:13:02 -0400
Subject: [PATCH] Readme Updates

---
 chart/README.md                 | 127 ++++++++++++++++++--------------
 chart/templates/config.yaml     |   2 +
 chart/templates/deployment.yaml |  12 +++
 chart/values.yaml               |  17 ++++-
 4 files changed, 102 insertions(+), 56 deletions(-)

diff --git a/chart/README.md b/chart/README.md
index eb319924e..cbd704078 100644
--- a/chart/README.md
+++ b/chart/README.md
@@ -12,9 +12,12 @@ Optionally, the chart also supports the installation of the llama-stack-playgrou
 Create a `local-values.yaml` file with the following:
 
 > **Note**
-> Chart currently only supports `vllm` framework directly. But other distributions can be used by modifying the `env` directly.
+> Chart currently only supports `vllm` framework directly. But other distributions can managed by adding to the `env` inside the values file directly.
 
 ```yaml
+
+distribution: distribution-remote-vllm
+
 vllm:
   url: "https://<MY_VLLM_INSTANCE>:443/v1"
   inferenceModel: "meta-llama/Llama-3.1-8B-Instruct"
@@ -27,59 +30,73 @@ Login to Kubernetes through the CLI and run:
 helm upgrade -i ollama-stack . -f local-values.yaml
 ```
 
+## Custom Configuration
+
+By default llama-stack will use the run.yaml config that comes with the specified distribution. For more granular control the `customRunConfig` can be set to true, in which case the helm chart will use the values inside of the `files/run.yaml` instead.
+
 ## Values
 
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| autoscaling.enabled | bool | `false` |  |
-| autoscaling.maxReplicas | int | `100` |  |
-| autoscaling.minReplicas | int | `1` |  |
-| autoscaling.targetCPUUtilizationPercentage | int | `80` |  |
-| distribution | string | `"distribution-remote-vllm"` |  |
-| image.pullPolicy | string | `"Always"` |  |
-| image.repository | string | `"docker.io/llamastack/{{ $.Values.distribution }}"` |  |
-| image.tag | string | `"0.1.6"` |  |
-| ingress.annotations | object | `{}` |  |
-| ingress.className | string | `""` |  |
-| ingress.enabled | bool | `true` |  |
-| ingress.hosts[0].host | string | `"chart-example.local"` |  |
-| ingress.hosts[0].paths[0].path | string | `"/"` |  |
-| ingress.hosts[0].paths[0].pathType | string | `"ImplementationSpecific"` |  |
-| ingress.tls | list | `[]` |  |
-| livenessProbe.httpGet.path | string | `"/v1/health"` |  |
-| livenessProbe.httpGet.port | int | `5001` |  |
-| podAnnotations | object | `{}` |  |
-| podLabels | object | `{}` |  |
-| podSecurityContext | object | `{}` |  |
-| readinessProbe.httpGet.path | string | `"/v1/health"` |  |
-| readinessProbe.httpGet.port | int | `5001` |  |
-| replicaCount | int | `1` |  |
-| resources.limits.cpu | string | `"100m"` |  |
-| resources.limits.memory | string | `"500Mi"` |  |
-| resources.requests.cpu | string | `"100m"` |  |
-| resources.requests.memory | string | `"500Mi"` |  |
-| route | object | `{"annotations":{},"enabled":false,"host":"","path":"","tls":{"enabled":true,"insecureEdgeTerminationPolicy":"Redirect","termination":"edge"}}` | Enable creation of the OpenShift Route object (This should be used instead of ingress on OpenShift) |
-| route.annotations | object | `{}` | Additional custom annotations for the route |
-| route.host | string | Set by OpenShift | The hostname for the route |
-| route.path | string | `""` | The path for the OpenShift route |
-| route.tls.enabled | bool | `true` | Enable secure route settings |
-| route.tls.insecureEdgeTerminationPolicy | string | `"Redirect"` | Insecure route termination policy |
-| route.tls.termination | string | `"edge"` | Secure route termination policy |
-| runConfig.enabled | bool | `false` |  |
-| service.port | int | `5001` |  |
-| service.type | string | `"ClusterIP"` |  |
-| serviceAccount.annotations | object | `{}` |  |
-| serviceAccount.automount | bool | `true` |  |
-| serviceAccount.create | bool | `false` |  |
-| serviceAccount.name | string | `""` |  |
-| startupProbe.failureThreshold | int | `30` |  |
-| startupProbe.httpGet.path | string | `"/v1/health"` |  |
-| startupProbe.httpGet.port | int | `5001` |  |
-| startupProbe.initialDelaySeconds | int | `40` |  |
-| startupProbe.periodSeconds | int | `10` |  |
-| telemetry.enabled | bool | `false` |  |
-| telemetry.serviceName | string | `"otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"` |  |
-| telemetry.sinks | string | `"console,sqlite,otel"` |  |
-| vllm.inferenceModel | string | `"llama2-7b-chat"` |  |
-| vllm.url | string | `"http://vllm-server"` |  |
-| yamlConfig | string | `"/config/run.yaml"` |  |
+### Llama Stack Specific
+
+| Key                     | Type     | Default                                                                    | Description                                                                                                                           |
+| :---------------------- | :------- | :------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------ |
+| `customRunConfig`       | `bool`   | `false`                                                                    | Indicates whether a custom run configuration is being used.                                                                           |
+| `distribution`          | `string` | `"distribution-remote-vllm"`                                               | Specifies the distribution or type of deployment being used (in this case, related to a remote vLLM distribution).                    |
+| `telemetry.enabled`     | `bool`   | `false`                                                                    | Enables or disables telemetry collection.                                                                                             |
+| `telemetry.serviceName` | `string` | `"otel-collector.openshift-opentelemetry-operator.svc.cluster.local:4318"` | The service name and address of the telemetry collector.                                                                              |
+| `telemetry.sinks`       | `string` | `"console,sqlite,otel"`                                                    | Specifies the destinations or sinks where telemetry data will be sent.                                                                |
+| `vllm.inferenceModel`   | `string` | `"llama2-7b-chat"`                                                         | The specific inference model to be used by vLLM (a high-throughput and memory-efficient inference service for large language models). |
+| `vllm.url`              | `string` | `"http://vllm-server"`                                                     | The URL of the vLLM service.                                                                                                          |
+| `env`                   | `object` | N/A                                                                        | A set of key/value pairs that can be set in the pod                                                                                   |
+
+### General
+
+| Key                                        | Type   | Default                                                                                                                            | Description                                                                                                                                                                                                                                                           |
+| :----------------------------------------- | :----- | :----------------------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `autoscaling.enabled`                      | `bool` | `false`                                                                                                                              | Enables or disables horizontal pod autoscaling, which automatically adjusts the number of running instances based on CPU utilization.                                                                                                                             |
+| `autoscaling.maxReplicas`                  | `int`  | `100`                                                                                                                                | The maximum number of pod replicas that the autoscaler can scale up to.                                                                                                                                                                                               |
+| `autoscaling.minReplicas`                  | `int`  | `1`                                                                                                                                  | The minimum number of pod replicas that will always be running.                                                                                                                                                                                                      |
+| `autoscaling.targetCPUUtilizationPercentage` | `int`  | `80`                                                                                                                                 | The target average CPU utilization across all running pods that the autoscaler will aim to maintain.                                                                                                                                                                    |
+| `image.pullPolicy`                         | `string` | `"Always"`                                                                                                                           | Defines when to pull the Docker image for the container (e.g., always pull, pull if not present, etc.).                                                                                                                                                           |
+| `image.repository`                         | `string` | `"docker.io/llamastack/{{ $.Values.distribution }}"`                                                                                 | The Docker image repository where the container image is located. It likely uses the `distribution` value to construct the full image path.                                                                                                                            |
+| `image.tag`                                | `string` | `"0.1.6"`                                                                                                                            | The specific version tag of the Docker image to use.                                                                                                                                                                                                                 |
+| `ingress.annotations`                      | `object` | `{}`                                                                                                                                 | Kubernetes Ingress annotations, which can be used to configure load balancers and other external access settings.                                                                                                                                                    |
+| `ingress.className`                        | `string` | `""`                                                                                                                                 | The name of the Ingress controller to use for this Ingress resource.                                                                                                                                                                                                 |
+| `ingress.enabled`                          | `bool` | `true`                                                                                                                               | Enables or disables the creation of a Kubernetes Ingress resource, which allows external access to the application.                                                                                                                                                  |
+| `ingress.hosts[0].host`                    | `string` | `"chart-example.local"`                                                                                                            | The hostname that the Ingress will route traffic to. This is often a placeholder or example.                                                                                                                                                                             |
+| `ingress.hosts[0].paths[0].path`          | `string` | `"/"`                                                                                                                                  | The path on the specified host that the Ingress will route traffic to (in this case, the root path).                                                                                                                                                                     |
+| `ingress.hosts[0].paths[0].pathType`      | `string` | `"ImplementationSpecific"`                                                                                                         | The type of path matching used by the Ingress controller.                                                                                                                                                                                                            |
+| `ingress.tls`                              | `list`   | `[]`                                                                                                                                 | Configuration for Transport Layer Security (TLS) termination at the Ingress, allowing for HTTPS.                                                                                                                                                                     |
+| `livenessProbe.httpGet.path`               | `string` | `"/v1/health"`                                                                                                                      | The HTTP endpoint path that the liveness probe will check to determine if the container is running and healthy.                                                                                                                                                           |
+| `livenessProbe.httpGet.port`               | `int`  | `5001`                                                                                                                               | The port that the liveness probe will connect to for the HTTP health check.                                                                                                                                                                                           |
+| `podAnnotations`                           | `object` | `{}`                                                                                                                                 | Kubernetes Pod annotations, which can be used to attach arbitrary non-identifying metadata to the Pod.                                                                                                                                                                 |
+| `podLabels`                                | `object` | `{}`                                                                                                                                 | Kubernetes Pod labels, which are key/value pairs that are attached to Pods and can be used for organizing and selecting groups of Pods.                                                                                                                                    |
+| `podSecurityContext`                       | `object` | `{}`                                                                                                                                 | Defines the security context for the Pod, such as user and group IDs, security capabilities, etc.                                                                                                                                                                      |
+| `readinessProbe.httpGet.path`              | `string` | `"/v1/health"`                                                                                                                      | The HTTP endpoint path that the readiness probe will check to determine if the container is ready to serve traffic.                                                                                                                                                           |
+| `readinessProbe.httpGet.port`              | `int`  | `5001`                                                                                                                               | The port that the readiness probe will connect to for the HTTP readiness check.                                                                                                                                                                                          |
+| `replicaCount`                             | `int`  | `1`                                                                                                                                  | The desired number of pod replicas to run.                                                                                                                                                                                                                         |
+| `resources.limits.cpu`                     | `string` | `"100m"`                                                                                                                             | The maximum amount of CPU resources that a container can use (in millicores).                                                                                                                                                                                            |
+| `resources.limits.memory`                  | `string` | `"500Mi"`                                                                                                                             | The maximum amount of memory that a container can use (in megabytes).                                                                                                                                                                                                  |
+| `resources.requests.cpu`                   | `string` | `"100m"`                                                                                                                             | The amount of CPU resources that Kubernetes will guarantee to be available for the container.                                                                                                                                                                              |
+| `resources.requests.memory`                | `string` | `"500Mi"`                                                                                                                             | The amount of memory that Kubernetes will guarantee to be available for the container (in megabytes).                                                                                                                                                                     |
+| `route`                                    | `object` | `{"annotations":{},"enabled":false,"host":"","path":"","tls":{"enabled":true,"insecureEdgeTerminationPolicy":"Redirect","termination":"edge"}}` | Configuration for an OpenShift Route object, which is used for exposing services externally on OpenShift.                                                                                                                                                           |
+| `route.annotations`                        | `object` | `{}`                                                                                                                                 | Additional custom annotations for the OpenShift Route object.                                                                                                                                                                                                        |
+| `route.host`                               | `string` | `Set by OpenShift`                                                                                                                   | The hostname for the OpenShift Route. This is typically managed by OpenShift.                                                                                                                                                                                           |
+| `route.path`                               | `string` | `""`                                                                                                                                 | The path for the OpenShift Route.                                                                                                                                                                                                                                    |
+| `route.tls.enabled`                        | `bool` | `true`                                                                                                                               | Enables or disables TLS for the OpenShift Route, providing secure communication.                                                                                                                                                                                          |
+| `route.tls.insecureEdgeTerminationPolicy`    | `string` | `"Redirect"`                                                                                                                         | The policy for handling insecure (HTTP) requests when TLS termination is at the edge (Route).                                                                                                                                                                         |
+| `route.tls.termination`                    | `string` | `"edge"`                                                                                                                             | Specifies that TLS termination occurs at the OpenShift Route edge.                                                                                                                                                                                                      |
+| `runConfig.enabled`                        | `bool` | `false`                                                                                                                              | Indicates whether a specific run configuration is enabled.                                                                                                                                                                                                           |
+| `service.port`                             | `int`  | `5001`                                                                                                                               | The port on which the Kubernetes Service will be exposed internally within the cluster.                                                                                                                                                                                  |
+| `service.type`                             | `string` | `"ClusterIP"`                                                                                                                        | The type of Kubernetes Service. `ClusterIP` makes the service only reachable from within the cluster.                                                                                                                                                                 |
+| `serviceAccount.annotations`               | `object` | `{}`                                                                                                                                 | Annotations for the Kubernetes ServiceAccount.                                                                                                                                                                                                                       |
+| `serviceAccount.automount`                 | `bool` | `true`                                                                                                                               | Indicates whether the ServiceAccount token should be automatically mounted into the Pods.                                                                                                                                                                            |
+| `serviceAccount.create`                    | `bool` | `false`                                                                                                                              | Determines whether a new Kubernetes ServiceAccount should be created.                                                                                                                                                                                                 |
+| `serviceAccount.name`                      | `string` | `""`                                                                                                                                 | The name of an existing Kubernetes ServiceAccount to use. If `create` is true and this is empty, a default name will be generated.                                                                                                                                     |
+| `startupProbe.failureThreshold`            | `int`  | `30`                                                                                                                                 | The number of consecutive failures of the startup probe before Kubernetes considers the container failed to start.                                                                                                                                                  |
+| `startupProbe.httpGet.path`                | `string` | `"/v1/health"`                                                                                                                      | The HTTP endpoint path for the startup probe, used to determine if the application has started successfully.                                                                                                                                                           |
+| `startupProbe.httpGet.port`                | `int`  | `5001`                                                                                                                               | The port for the HTTP startup probe.                                                                                                                                                                                                                                 |
+| `startupProbe.initialDelaySeconds`         | `int`  | `40`                                                                                                                                 | The number of seconds to wait after the container has started before the startup probe is first initiated.                                                                                                                                                            |
+| `startupProbe.periodSeconds`               | `int`  | `10`                                                                                                                                 | The interval (in seconds) at which the startup probe will be executed.                                                                                                                                                                                               |
+| `volumeMounts`                             | `list`   | `[]`                                                                                                                                 | A list of volume mounts that define how volumes should be mounted into the container's filesystem.                                                                                                                                                                   |
+| `volumes`                                  | `list`   | `[]`                                                                                                                                 | A list of volume definitions that provide storage for the Pod.                                                                                                                                                                                                          |
diff --git a/chart/templates/config.yaml b/chart/templates/config.yaml
index 62e18272c..f7f7c6373 100644
--- a/chart/templates/config.yaml
+++ b/chart/templates/config.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.customRunConfig }}
 apiVersion: v1
 kind: ConfigMap
 metadata:
@@ -5,3 +6,4 @@ metadata:
 data:
   run.yaml: |-
     {{- .Files.Get "files/run.yaml" | nindent 4 }}
+{{- end }}
\ No newline at end of file
diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml
index 7f2a0ea2f..59488ffc5 100644
--- a/chart/templates/deployment.yaml
+++ b/chart/templates/deployment.yaml
@@ -38,9 +38,11 @@ spec:
             - name: http
               containerPort: {{ .Values.service.port }}
               protocol: TCP
+          {{- if .Values.customRunConfig }}
           args:
             - "--yaml-config"
             - "/config/run.yaml"
+          {{- end }}
           env:
           {{- with .Values.vllm }}
             - name: VLLM_URL
@@ -69,14 +71,24 @@ spec:
             {{- tpl (toYaml .Values.startupProbe) $ | nindent 12 }}
           resources:
             {{- toYaml .Values.resources | nindent 12 }}
+          {{- if or .Values.customRunConfig .Values.volumeMounts }}
           volumeMounts:
             - name: config-volume
               mountPath: /config
+            {{- with .Values.volumeMounts }}
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
+          {{- end }}
+      {{- if or .Values.customRunConfig .Values.volumes }}
       volumes:
         - name: config-volume
           configMap:
             name: {{ include "llama-stack.fullname" . }}-run-config
             defaultMode: 0755
+        {{- with .Values.volumes }}
+          {{- toYaml . | nindent 8 }}
+        {{- end }}
+        {{- end }}
       {{- with .Values.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
diff --git a/chart/values.yaml b/chart/values.yaml
index 65e0ce61c..cbd359c60 100644
--- a/chart/values.yaml
+++ b/chart/values.yaml
@@ -1,4 +1,6 @@
-# yamlConfig: "/config/run.yaml"
+
+# When set to true use the `run.yaml` file in the `files/run.yaml` directory
+customRunConfig: false
 
 # TODO: Currently we are only working for vLLM this should be expanded in the future
 vllm:
@@ -69,6 +71,19 @@ service:
   # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports
   port: 5001
 
+# Additional volumes on the output Deployment definition.
+volumes: []
+# - name: foo
+#   secret:
+#     secretName: mysecret
+#     optional: false
+
+# Additional volumeMounts on the output Deployment definition.
+volumeMounts: []
+# - name: foo
+#   mountPath: "/etc/foo"
+#   readOnly: true
+
 
 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/
 ingress: