mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-02 08:44:44 +00:00
add NIM k8s solution
This commit is contained in:
parent
95d25ddfe2
commit
8c0f328cbc
4 changed files with 133 additions and 128 deletions
73
docs/source/distributions/k8s/llama-nim.yaml.template
Normal file
73
docs/source/distributions/k8s/llama-nim.yaml.template
Normal file
|
@ -0,0 +1,73 @@
|
|||
# -------------------------------------------------
|
||||
# NVIDIA NIM — Llama-3 1 Nemotron-Nano-4B-v1 1
|
||||
# -------------------------------------------------
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llama-nano-nim
|
||||
labels:
|
||||
app: llama-nano-nim
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llama-nano-nim
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llama-nano-nim
|
||||
spec:
|
||||
imagePullSecrets:
|
||||
- name: ngc-secret # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
|
||||
volumes:
|
||||
- name: model-cache
|
||||
emptyDir:
|
||||
medium: Memory # tmpfs; omit or use "" to back by node disk
|
||||
sizeLimit: 12Gi # fits the 4 B model + tensors; adjust if needed
|
||||
containers:
|
||||
- name: nim
|
||||
image: nvcr.io/nim/meta/llama-3_1-nemotron-nano-4b-v1_1:1.0.0
|
||||
ports:
|
||||
- name: http-openai
|
||||
containerPort: 8000
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
env:
|
||||
- name: NIM_MODEL_NAME
|
||||
value: "nvidia/llama-3_1-nemotron-nano-4b-v1_1"
|
||||
- name: NGC_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: ngc-api
|
||||
key: NGC_API_KEY
|
||||
volumeMounts:
|
||||
- name: model-cache
|
||||
mountPath: /models # default NIM cache path
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /v1/models
|
||||
port: http-openai
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /v1/health
|
||||
port: http-openai
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 30
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llama-nano-nim
|
||||
spec:
|
||||
selector:
|
||||
app: llama-nano-nim
|
||||
ports:
|
||||
- name: http-openai
|
||||
port: 8000
|
||||
targetPort: 8000
|
||||
type: ClusterIP
|
Loading…
Add table
Add a link
Reference in a new issue