llama-stack-mirror/docs/source/distributions/k8s-benchmark/locust-k8s.yaml
ehhuang d6ae54723d
chore: setup for performance benchmarking (#3096)
# What does this PR do?
1. Added a simple mock openai-compat server that serves chat/completion
2. Add a benchmark server in EKS that includes mock inference server
3. Add locust (https://locust.io/) file for load testing

## Test Plan
bash apply.sh
kubectl port-forward service/locust-web-ui 8089:8089
Go to localhost:8089 to start a load test

<img width="1392" height="334" alt="image"
src="https://github.com/user-attachments/assets/d6aa3deb-583a-42ed-889b-751262b8e91c"
/>
<img width="1362" height="881" alt="image"
src="https://github.com/user-attachments/assets/6a28b9b4-05e6-44e2-b504-07e60c12d35e"
/>
2025-08-13 10:58:22 -07:00

131 lines
2.9 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: locust-master
labels:
app: locust
role: master
spec:
replicas: 1
selector:
matchLabels:
app: locust
role: master
template:
metadata:
labels:
app: locust
role: master
spec:
containers:
- name: locust-master
image: locustio/locust:2.31.8
ports:
- containerPort: 8089 # Web UI
- containerPort: 5557 # Master communication
env:
- name: LOCUST_HOST
value: "${LOCUST_HOST}"
- name: LOCUST_LOCUSTFILE
value: "/locust/locustfile.py"
- name: LOCUST_WEB_HOST
value: "0.0.0.0"
- name: LOCUST_MASTER
value: "true"
- name: LOCUST_BASE_PATH
value: "${LOCUST_BASE_PATH}"
- name: INFERENCE_MODEL
value: "${BENCHMARK_INFERENCE_MODEL}"
volumeMounts:
- name: locust-script
mountPath: /locust
command: ["locust"]
args:
- "--master"
- "--web-host=0.0.0.0"
- "--web-port=8089"
- "--host=${LOCUST_HOST}"
- "--locustfile=/locust/locustfile.py"
volumes:
- name: locust-script
configMap:
name: locust-script
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: locust-worker
labels:
app: locust
role: worker
spec:
replicas: 2 # Start with 2 workers, can be scaled up
selector:
matchLabels:
app: locust
role: worker
template:
metadata:
labels:
app: locust
role: worker
spec:
containers:
- name: locust-worker
image: locustio/locust:2.31.8
env:
- name: LOCUST_HOST
value: "${LOCUST_HOST}"
- name: LOCUST_LOCUSTFILE
value: "/locust/locustfile.py"
- name: LOCUST_MASTER_HOST
value: "locust-master-service"
- name: LOCUST_MASTER_PORT
value: "5557"
- name: INFERENCE_MODEL
value: "${BENCHMARK_INFERENCE_MODEL}"
- name: LOCUST_BASE_PATH
value: "${LOCUST_BASE_PATH}"
volumeMounts:
- name: locust-script
mountPath: /locust
command: ["locust"]
args:
- "--worker"
- "--master-host=locust-master-service"
- "--master-port=5557"
- "--locustfile=/locust/locustfile.py"
volumes:
- name: locust-script
configMap:
name: locust-script
---
apiVersion: v1
kind: Service
metadata:
name: locust-master-service
spec:
selector:
app: locust
role: master
ports:
- name: web-ui
port: 8089
targetPort: 8089
- name: master-comm
port: 5557
targetPort: 5557
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
name: locust-web-ui
spec:
selector:
app: locust
role: master
ports:
- port: 8089
targetPort: 8089
type: ClusterIP # Keep internal, use port-forward to access