mirror of
				https://github.com/meta-llama/llama-stack.git
				synced 2025-10-25 17:11:12 +00:00 
			
		
		
		
	Test plan: python tests/verifications/generate_report.py --providers fireworks,together,llama_meta_ref,openai Co-authored-by: Eric Huang <erichuang@fb.com>
		
			
				
	
	
		
			8 lines
		
	
	
	
		
			392 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
			
		
		
	
	
			8 lines
		
	
	
	
		
			392 B
		
	
	
	
		
			YAML
		
	
	
	
	
	
| # LLAMA_STACK_PORT=5002 llama stack run meta-reference-gpu --env INFERENCE_MODEL=meta-llama/Llama-4-Scout-17B-16E-Instruct --env INFERENCE_CHECKPOINT_DIR=<path_to_ckpt>
 | |
| base_url: http://localhost:5002/v1/openai/v1
 | |
| api_key_var: foo
 | |
| models:
 | |
| - meta-llama/Llama-4-Scout-17B-16E-Instruct
 | |
| model_display_names:
 | |
|   meta-llama/Llama-4-Scout-17B-16E-Instruct: Llama-4-Scout-Instruct
 | |
| test_exclusions: {}
 |