mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 01:01:13 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			78 lines
		
	
	
	
		
			2.1 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
			
		
		
	
	
			78 lines
		
	
	
	
		
			2.1 KiB
		
	
	
	
		
			Text
		
	
	
	
	
	
| # -------------------------------------------------
 | |
| # NVIDIA NIM - Code
 | |
| # -------------------------------------------------
 | |
| 
 | |
| apiVersion: apps/v1
 | |
| kind: Deployment
 | |
| metadata:
 | |
|   name: llm-nim-code
 | |
|   labels:
 | |
|     app: llm-nim-code
 | |
| spec:
 | |
|   replicas: 1
 | |
|   selector:
 | |
|     matchLabels:
 | |
|       app: llm-nim-code
 | |
|   template:
 | |
|     metadata:
 | |
|       labels:
 | |
|         app: llm-nim-code
 | |
|         nim-type: llama-nim
 | |
|       annotations:
 | |
|         prometheus.io/scrape: 'true'
 | |
|         prometheus.io/port: '8000'
 | |
|         prometheus.io/path: '/v1/metrics'
 | |
|     spec:
 | |
|       imagePullSecrets:
 | |
|         - name: ngc-docker-registry          # docker-registry secret: nvcr.io / $oauthtoken / <NGC_DOCKER_API_KEY>
 | |
|       volumes:
 | |
|         - name: model-cache
 | |
|           emptyDir:
 | |
|             medium: Memory          # tmpfs; omit or use "" to back by node disk
 | |
|             sizeLimit: 12Gi          # fits the 4 B model + tensors; adjust if needed
 | |
|       containers:
 | |
|         - name: nim
 | |
|           image: nvcr.io/nim/bigcode/starcoder2-7b:1.8.1
 | |
|           ports:
 | |
|             - name: http-openai
 | |
|               containerPort: 8000
 | |
|           resources:
 | |
|             limits:
 | |
|               nvidia.com/gpu: 1
 | |
|           env:
 | |
|             - name: NIM_MODEL_NAME
 | |
|               value: "nvidia/starcoder2-7b"
 | |
|             - name: NGC_API_KEY
 | |
|               valueFrom:
 | |
|                 secretKeyRef:
 | |
|                   name: ngc-api
 | |
|                   key: NGC_API_KEY
 | |
|             - name: NVIDIA_VISIBLE_DEVICES
 | |
|               value: "0"
 | |
|             - name: NVIDIA_DRIVER_CAPABILITIES
 | |
|               value: "compute,utility"
 | |
|             - name: ENABLE_GPU_METRICS
 | |
|               value: "true"
 | |
|           volumeMounts:
 | |
|             - name: model-cache
 | |
|               mountPath: /models       # default NIM cache path
 | |
|           readinessProbe:
 | |
|             httpGet:
 | |
|               path: /v1/models
 | |
|               port: 8000
 | |
|             initialDelaySeconds: 100
 | |
|             periodSeconds: 100
 | |
| 
 | |
| ---
 | |
| apiVersion: v1
 | |
| kind: Service
 | |
| metadata:
 | |
|   name: llm-nim-code
 | |
| spec:
 | |
|   selector:
 | |
|     app: llm-nim-code
 | |
|   ports:
 | |
|     - name: http-openai
 | |
|       port: 8000
 | |
|       targetPort: 8000
 | |
|   type: ClusterIP
 |