# ------------------------------------------------- # NVIDIA NIM - Code # ------------------------------------------------- apiVersion: apps/v1 kind: Deployment metadata: name: llm-nim-code labels: app: llm-nim-code spec: replicas: 1 selector: matchLabels: app: llm-nim-code template: metadata: labels: app: llm-nim-code nim-type: llama-nim annotations: prometheus.io/scrape: 'true' prometheus.io/port: '8000' prometheus.io/path: '/v1/metrics' spec: imagePullSecrets: - name: ngc-docker-registry # docker-registry secret: nvcr.io / $oauthtoken / volumes: - name: model-cache emptyDir: medium: Memory # tmpfs; omit or use "" to back by node disk sizeLimit: 12Gi # fits the 4 B model + tensors; adjust if needed containers: - name: nim image: nvcr.io/nim/bigcode/starcoder2-7b:1.8.1 ports: - name: http-openai containerPort: 8000 resources: limits: nvidia.com/gpu: 1 env: - name: NIM_MODEL_NAME value: "nvidia/starcoder2-7b" - name: NGC_API_KEY valueFrom: secretKeyRef: name: ngc-api key: NGC_API_KEY - name: NVIDIA_VISIBLE_DEVICES value: "all" - name: NVIDIA_DRIVER_CAPABILITIES value: "compute,utility" - name: ENABLE_GPU_METRICS value: "true" volumeMounts: - name: model-cache mountPath: /models # default NIM cache path readinessProbe: httpGet: path: /v1/models port: 8000 initialDelaySeconds: 100 periodSeconds: 100 --- apiVersion: v1 kind: Service metadata: name: llm-nim-code spec: selector: app: llm-nim-code ports: - name: http-openai port: 8000 targetPort: 8000 type: ClusterIP