forked from phoenix-oss/llama-stack-mirror
		
	
		
			
				
	
	
		
			19 lines
		
	
	
	
		
			402 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			19 lines
		
	
	
	
		
			402 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
| version: '2'
 | |
| name: vllm-gpu
 | |
| distribution_spec:
 | |
|   description: Use a built-in vLLM engine for running LLM inference
 | |
|   docker_image: null
 | |
|   providers:
 | |
|     inference:
 | |
|     - inline::vllm
 | |
|     memory:
 | |
|     - inline::faiss
 | |
|     - remote::chromadb
 | |
|     - remote::pgvector
 | |
|     safety:
 | |
|     - inline::llama-guard
 | |
|     agents:
 | |
|     - inline::meta-reference
 | |
|     telemetry:
 | |
|     - inline::meta-reference
 | |
| image_type: conda
 |