mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 01:01:13 +00:00 
			
		
		
		
	This Kubernetes cluster has: - vLLM for serving an inference model - vLLM for serving a safety model - Postgres DB (for metadata and other state for the Llama Stack distro) - Chroma DB for Vector IO (memory) Perhaps most importantly, this was me trying to learn Kubernetes for the first time. ## Test Plan Run `sh apply.sh` against an EKS cluster, then after `kubectl port-forward service/llama-stack-service 8321:8321` and after many attempts, we have finally: <img width="1589" alt="image" src="https://github.com/user-attachments/assets/c69f242d-6aaa-4def-9f7c-172113b8bfc1" /> <img width="1978" alt="image" src="https://github.com/user-attachments/assets/cf678404-f551-4fa5-9077-bebe3e8e8ae8" />
		
			
				
	
	
		
			66 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			66 lines
		
	
	
	
		
			1.3 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| apiVersion: v1
 | |
| kind: PersistentVolumeClaim
 | |
| metadata:
 | |
|   name: chromadb-pvc
 | |
| spec:
 | |
|   accessModes:
 | |
|     - ReadWriteOnce
 | |
|   resources:
 | |
|     requests:
 | |
|       storage: 20Gi
 | |
| ---
 | |
| apiVersion: apps/v1
 | |
| kind: Deployment
 | |
| metadata:
 | |
|   name: chromadb
 | |
| spec:
 | |
|   replicas: 1
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       app: chromadb
 | |
|   template:
 | |
|     metadata:
 | |
|       labels:
 | |
|         app: chromadb
 | |
|     spec:
 | |
|       containers:
 | |
|       - name: chromadb
 | |
|         image: chromadb/chroma:latest
 | |
|         ports:
 | |
|         - containerPort: 6000
 | |
|         env:
 | |
|         - name: CHROMA_HOST
 | |
|           value: "0.0.0.0"
 | |
|         - name: CHROMA_PORT
 | |
|           value: "6000"
 | |
|         - name: PERSIST_DIRECTORY
 | |
|           value: "/chroma/chroma"
 | |
|         - name: CHROMA_DB_IMPL
 | |
|           value: "duckdb+parquet"
 | |
|         resources:
 | |
|           requests:
 | |
|             memory: "512Mi"
 | |
|             cpu: "250m"
 | |
|           limits:
 | |
|             memory: "2Gi"
 | |
|             cpu: "1000m"
 | |
|         volumeMounts:
 | |
|         - name: chromadb-storage
 | |
|           mountPath: /chroma/chroma
 | |
|       volumes:
 | |
|       - name: chromadb-storage
 | |
|         persistentVolumeClaim:
 | |
|           claimName: chromadb-pvc
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Service
 | |
| metadata:
 | |
|   name: chromadb
 | |
| spec:
 | |
|   selector:
 | |
|     app: chromadb
 | |
|   ports:
 | |
|   - protocol: TCP
 | |
|     port: 6000
 | |
|     targetPort: 6000
 | |
|   type: ClusterIP
 |