add NIM k8s solution

This commit is contained in:
Kai Wu 2025-07-29 09:01:21 -07:00
parent 95d25ddfe2
commit 8c0f328cbc
4 changed files with 133 additions and 128 deletions

View file

@ -0,0 +1,73 @@
# -------------------------------------------------
# NVIDIA NIM — Llama-3 1 Nemotron-Nano-4B-v1 1
# -------------------------------------------------
apiVersion: apps/v1
kind: Deployment
metadata:
name: llama-nano-nim
labels:
app: llama-nano-nim
spec:
replicas: 1
selector:
matchLabels:
app: llama-nano-nim
template:
metadata:
labels:
app: llama-nano-nim
spec:
imagePullSecrets:
- name: ngc-secret # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
volumes:
- name: model-cache
emptyDir:
medium: Memory # tmpfs; omit or use "" to back by node disk
sizeLimit: 12Gi # fits the 4 B model + tensors; adjust if needed
containers:
- name: nim
image: nvcr.io/nim/meta/llama-3_1-nemotron-nano-4b-v1_1:1.0.0
ports:
- name: http-openai
containerPort: 8000
resources:
limits:
nvidia.com/gpu: 1
env:
- name: NIM_MODEL_NAME
value: "nvidia/llama-3_1-nemotron-nano-4b-v1_1"
- name: NGC_API_KEY
valueFrom:
secretKeyRef:
name: ngc-api
key: NGC_API_KEY
volumeMounts:
- name: model-cache
mountPath: /models # default NIM cache path
readinessProbe:
httpGet:
path: /v1/models
port: http-openai
initialDelaySeconds: 20
periodSeconds: 10
livenessProbe:
httpGet:
path: /v1/health
port: http-openai
initialDelaySeconds: 60
periodSeconds: 30
---
apiVersion: v1
kind: Service
metadata:
name: llama-nano-nim
spec:
selector:
app: llama-nano-nim
ports:
- name: http-openai
port: 8000
targetPort: 8000
type: ClusterIP