mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-26 01:12:59 +00:00 
			
		
		
		
	- Add setup-vllm GitHub action to start VLLM container - Extend integration test matrix to support both ollama and vllm providers - Make test setup conditional based on provider type - Add provider-specific environment variables and configurations - vllm tests setup to run weekly or can be triggered manually (only ollama on PR) TODO: investigate failing tests for vllm provider (safety and post_training) Also need a proper fix for #2713 (tmp fix for this in the first commit in this PR) Closes: #1648 --------- Signed-off-by: Derek Higgins <derekh@redhat.com>
		
			
				
	
	
		
			27 lines
		
	
	
	
		
			805 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			27 lines
		
	
	
	
		
			805 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
| name: Setup VLLM
 | |
| description: Start VLLM
 | |
| runs:
 | |
|   using: "composite"
 | |
|   steps:
 | |
|     - name: Start VLLM
 | |
|       shell: bash
 | |
|       run: |
 | |
|         # Start vllm container
 | |
|         docker run -d \
 | |
|           --name vllm \
 | |
|           -p 8000:8000 \
 | |
|           --privileged=true \
 | |
|           quay.io/higginsd/vllm-cpu:65393ee064 \
 | |
|           --host 0.0.0.0 \
 | |
|           --port 8000 \
 | |
|           --enable-auto-tool-choice \
 | |
|           --tool-call-parser llama3_json \
 | |
|           --model /root/.cache/Llama-3.2-1B-Instruct \
 | |
|           --served-model-name meta-llama/Llama-3.2-1B-Instruct
 | |
| 
 | |
|           # Wait for vllm to be ready
 | |
|           echo "Waiting for vllm to be ready..."
 | |
|           timeout 900 bash -c 'until curl -f http://localhost:8000/health; do
 | |
|             echo "Waiting for vllm..."
 | |
|             sleep 5
 | |
|           done'
 |