mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-23 08:33:09 +00:00
73 lines
1.9 KiB
Text
73 lines
1.9 KiB
Text
# -------------------------------------------------
|
|
# NVIDIA NIM - Code
|
|
# -------------------------------------------------
|
|
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: llm-nim-code
|
|
labels:
|
|
app: llm-nim-code
|
|
spec:
|
|
replicas: 1
|
|
selector:
|
|
matchLabels:
|
|
app: llm-nim-code
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: llm-nim-code
|
|
spec:
|
|
imagePullSecrets:
|
|
- name: ngc-docker-registry # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
|
|
volumes:
|
|
- name: model-cache
|
|
emptyDir:
|
|
medium: Memory # tmpfs; omit or use "" to back by node disk
|
|
sizeLimit: 12Gi # fits the 4 B model + tensors; adjust if needed
|
|
containers:
|
|
- name: nim
|
|
image: nvcr.io/nim/bigcode/starcoder2-7b:1.8.1
|
|
ports:
|
|
- name: http-openai
|
|
containerPort: 8000
|
|
resources:
|
|
limits:
|
|
nvidia.com/gpu: 1
|
|
env:
|
|
- name: NIM_MODEL_NAME
|
|
value: "nvidia/starcoder2-7b"
|
|
- name: NGC_API_KEY
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: ngc-api
|
|
key: NGC_API_KEY
|
|
volumeMounts:
|
|
- name: model-cache
|
|
mountPath: /models # default NIM cache path
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /v1/models
|
|
port: http-openai
|
|
initialDelaySeconds: 360
|
|
periodSeconds: 360
|
|
livenessProbe:
|
|
httpGet:
|
|
path: /v1/health
|
|
port: http-openai
|
|
initialDelaySeconds: 600
|
|
periodSeconds: 360
|
|
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata:
|
|
name: llm-nim-code
|
|
spec:
|
|
selector:
|
|
app: llm-nim-code
|
|
ports:
|
|
- name: http-openai
|
|
port: 8000
|
|
targetPort: 8000
|
|
type: ClusterIP
|