version: '2' image_name: perf-test-demo apis: - inference providers: inference: - provider_id: vllm-inference provider_type: remote::vllm config: url: ${env.VLLM_URL:=http://localhost:8001/v1} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=false} models: - model_id: ${env.INFERENCE_MODEL} provider_id: vllm-inference model_type: llm server: port: 8322